summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2025-11-11 14:21:12 -0800
committerAmir Ayupov <aaupov@fb.com>2025-11-11 14:21:12 -0800
commita92126532654caca51465029dc759054feacec7a (patch)
tree4defcf76fb267ab365b193e8fe01da60512f35f5
parent4c3e0320a103b34d6d5570da69b2fed3d9694b12 (diff)
[𝘀𝗽𝗿] changes to main this commit is based onusers/aaupov/spr/main.bolt-optionally-turn-off-hash-based-stale-matching
Created using spr 1.3.4 [skip ci]
-rw-r--r--bolt/include/bolt/Core/BinaryContext.h12
-rw-r--r--bolt/include/bolt/Profile/ProfileYAMLMapping.h4
-rw-r--r--bolt/include/bolt/Profile/YAMLProfileReader.h117
-rw-r--r--bolt/include/bolt/Profile/YAMLProfileWriter.h8
-rw-r--r--bolt/include/bolt/Utils/CommandLineOpts.h1
-rw-r--r--bolt/lib/Passes/BinaryPasses.cpp21
-rw-r--r--bolt/lib/Profile/DataAggregator.cpp7
-rw-r--r--bolt/lib/Profile/StaleProfileMatching.cpp418
-rw-r--r--bolt/lib/Profile/YAMLProfileReader.cpp475
-rw-r--r--bolt/lib/Profile/YAMLProfileWriter.cpp113
-rw-r--r--bolt/lib/Rewrite/PseudoProbeRewriter.cpp2
-rw-r--r--bolt/test/X86/match-blocks-with-pseudo-probes-inline.test4
-rw-r--r--bolt/test/X86/match-blocks-with-pseudo-probes.test4
-rw-r--r--bolt/test/X86/reader-stale-yaml.test4
14 files changed, 777 insertions, 413 deletions
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 2af1d330b754..ee4256314862 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -760,10 +760,8 @@ public:
uint32_t NumExactMatchedBlocks{0};
/// the number of loosely matched basic blocks
uint32_t NumLooseMatchedBlocks{0};
- /// the number of exactly pseudo probe matched basic blocks
- uint32_t NumPseudoProbeExactMatchedBlocks{0};
- /// the number of loosely pseudo probe matched basic blocks
- uint32_t NumPseudoProbeLooseMatchedBlocks{0};
+ /// the number of pseudo probe matched basic blocks
+ uint32_t NumPseudoProbeMatchedBlocks{0};
/// the number of call matched basic blocks
uint32_t NumCallMatchedBlocks{0};
/// the total count of samples in the profile
@@ -772,10 +770,8 @@ public:
uint64_t ExactMatchedSampleCount{0};
/// the count of loosely matched samples
uint64_t LooseMatchedSampleCount{0};
- /// the count of exactly pseudo probe matched samples
- uint64_t PseudoProbeExactMatchedSampleCount{0};
- /// the count of loosely pseudo probe matched samples
- uint64_t PseudoProbeLooseMatchedSampleCount{0};
+ /// the count of pseudo probe matched samples
+ uint64_t PseudoProbeMatchedSampleCount{0};
/// the count of call matched samples
uint64_t CallMatchedSampleCount{0};
} Stats;
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index b393c85321b7..509f9aedda80 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -140,6 +140,7 @@ struct BinaryBasicBlockProfile {
std::vector<CallSiteInfo> CallSites;
std::vector<SuccessorInfo> Successors;
std::vector<PseudoProbeInfo> PseudoProbes;
+ std::string PseudoProbesStr;
bool operator==(const BinaryBasicBlockProfile &Other) const {
return Index == Other.Index;
@@ -163,6 +164,7 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
std::vector<bolt::SuccessorInfo>());
YamlIO.mapOptional("probes", BBP.PseudoProbes,
std::vector<bolt::PseudoProbeInfo>());
+ YamlIO.mapOptional("probe", BBP.PseudoProbesStr, std::string());
}
};
@@ -207,6 +209,7 @@ struct BinaryFunctionProfile {
uint64_t ExternEntryCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
std::vector<InlineTreeNode> InlineTree;
+ std::string InlineTreeStr;
bool Used{false};
};
} // end namespace bolt
@@ -223,6 +226,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
std::vector<bolt::BinaryBasicBlockProfile>());
YamlIO.mapOptional("inline_tree", BFP.InlineTree,
std::vector<bolt::InlineTreeNode>());
+ YamlIO.mapOptional("ppit", BFP.InlineTreeStr, std::string());
}
};
diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h
index 91f51f30e3ca..781b89dd8aed 100644
--- a/bolt/include/bolt/Profile/YAMLProfileReader.h
+++ b/bolt/include/bolt/Profile/YAMLProfileReader.h
@@ -45,9 +45,6 @@ public:
using ProfileLookupMap =
DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>;
- using GUIDInlineTreeMap =
- std::unordered_map<uint64_t, const MCDecodedPseudoProbeInlineTree *>;
-
/// A class for matching binary functions in functions in the YAML profile.
/// First, a call graph is constructed for both profiled and binary functions.
/// Then functions are hashed based on the names of their callee/caller
@@ -110,51 +107,40 @@ public:
// (profile) (binary)
// | blocks ^
// v |
- // yaml::bolt::BinaryBasicBlockProfile ~= FlowBlock
- // ||| probes ^ (majority vote)
- // v ||| BBPseudoProbeToBlock
- // yaml::bolt::PseudoProbeInfo MCDecodedPseudoProbe
+ // BinaryBasicBlockProfile BasicBlock
+ // | probes ^
+ // v | address
+ // PseudoProbeInfo MCDecodedPseudoProbe
// | InlineTreeIndex ^
// v | probe id
// [ profile node id (uint32_t) -> MCDecodedPseudoProbeInlineTree *]
// InlineTreeNodeMapTy
- class InlineTreeNodeMapTy {
- DenseMap<uint32_t, const MCDecodedPseudoProbeInlineTree *> Map;
-
- void mapInlineTreeNode(uint32_t ProfileNodeIdx,
- const MCDecodedPseudoProbeInlineTree *BinaryNode) {
- auto Res = Map.try_emplace(ProfileNodeIdx, BinaryNode);
- assert(Res.second &&
- "Duplicate mapping from profile node index to binary inline tree");
- (void)Res;
- }
-
- public:
- /// Returns matched InlineTree * for a given profile inline_tree_id.
- const MCDecodedPseudoProbeInlineTree *
- getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const {
- auto It = Map.find(ProfileInlineTreeNodeId);
- if (It == Map.end())
- return nullptr;
- return It->second;
- }
+ using InlineTreeNodeMapTy =
+ std::vector<const MCDecodedPseudoProbeInlineTree *>;
- // Match up \p YamlInlineTree with binary inline tree rooted at \p Root.
- // Return the number of matched nodes.
- //
- // This function populates the mapping from profile inline tree node id to a
- // corresponding binary MCDecodedPseudoProbeInlineTree node.
- size_t matchInlineTrees(
- const MCPseudoProbeDecoder &Decoder,
- const std::vector<yaml::bolt::InlineTreeNode> &YamlInlineTree,
- const MCDecodedPseudoProbeInlineTree *Root);
- };
-
- // Partial probe matching specification: matched inline tree and corresponding
- // BinaryFunctionProfile
+ // Match up \p YamlInlineTree with binary inline tree rooted at \p Root.
+ // Return the number of matched nodes.
+ //
+ // This function populates the \p Map indexed by \p YamlInlineTree from
+ // profile inline tree node id to a corresponding binary
+ // MCDecodedPseudoProbeInlineTree node on a successful match or nullptr.
+ size_t matchInlineTreesImpl(
+ BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF,
+ const MCDecodedPseudoProbeInlineTree &Root, uint32_t RootIdx,
+ ArrayRef<yaml::bolt::InlineTreeNode> YamlInlineTree,
+ MutableArrayRef<const MCDecodedPseudoProbeInlineTree *> Map, float Scale);
+
+ /// Support mapping the profile to the same binary function more than once.
+ /// The index is used to check and prevent recursive (backtracking) matches.
+ /// The value is a node map and scale factor, used to downscale outlined
+ /// profile when inlining it into the function.
+ using RootIdxToMapTy =
+ std::map<uint32_t, std::pair<InlineTreeNodeMapTy, float>>;
+ /// Probe matching specification: map from \p BinaryFunctionProfile to the
+ /// partial match specification \p RootIdxToMapTy.
using ProbeMatchSpec =
- std::pair<InlineTreeNodeMapTy,
- std::reference_wrapper<yaml::bolt::BinaryFunctionProfile>>;
+ std::unordered_map<const yaml::bolt::BinaryFunctionProfile *,
+ RootIdxToMapTy>;
private:
/// Adjustments for basic samples profiles (without LBR).
@@ -183,18 +169,43 @@ private:
/// Map a common LTO prefix to a set of binary functions.
StringMap<std::unordered_set<BinaryFunction *>> LTOCommonNameFunctionMap;
+ /// For pseudo probe function matching.
+ /// Set of profile GUIDs.
+ std::unordered_set<uint64_t> YamlGUIDs;
+ /// Map binary function GUID to binary function.
+ std::unordered_multimap<uint64_t, BinaryFunction *> GUIDToBF;
+
/// Function names in profile.
StringSet<> ProfileFunctionNames;
/// BinaryFunction pointers indexed by YamlBP functions.
std::vector<BinaryFunction *> ProfileBFs;
- // Pseudo probe function GUID to inline tree node
- GUIDInlineTreeMap TopLevelGUIDToInlineTree;
-
// Mapping from a binary function to its partial match specification
// (YAML profile and its inline tree mapping to binary).
- DenseMap<BinaryFunction *, std::vector<ProbeMatchSpec>> BFToProbeMatchSpecs;
+ DenseMap<BinaryFunction *, ProbeMatchSpec> BFToProbeMatchSpecs;
+
+ /// Pseudo probe matching stats.
+ struct {
+ // Inline tree root mismatch causes.
+ uint64_t MismatchingRootGUID{0};
+ uint64_t MismatchingRootHash{0};
+ // Inline tree internal node mismatch causes.
+ uint64_t MismatchingNodeHash{0};
+ uint64_t MissingProfileNode{0};
+ // Incomplete profile causes.
+ uint64_t MissingCallProbe{0};
+ uint64_t MissingCallee{0};
+ uint64_t MissingInlineTree{0};
+ uint64_t TotalCallSites{0};
+ uint64_t MissingCallCount{0};
+ uint64_t TotalCallCount{0};
+ // Total matched stats.
+ uint64_t MatchedRoots{0};
+ uint64_t MatchedNodes{0};
+ uint64_t AttemptedNodes{0};
+ uint64_t AttemptedRoots{0};
+ } ProbeMatchingStats;
/// Populate \p Function profile with the one supplied in YAML format.
bool parseFunctionProfile(BinaryFunction &Function,
@@ -207,7 +218,7 @@ private:
/// Infer function profile from stale data (collected on older binaries).
bool inferStaleProfile(BinaryFunction &Function,
const yaml::bolt::BinaryFunctionProfile &YamlBF,
- const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs);
+ const ProbeMatchSpec &ProbeMatchSpecs);
/// Initialize maps for profile matching.
void buildNameMaps(BinaryContext &BC);
@@ -242,6 +253,18 @@ private:
ProfiledFunctions.emplace(&BF);
}
+ /// Return a top-level binary inline tree node for a given \p BF
+ const MCDecodedPseudoProbeInlineTree *
+ lookupTopLevelNode(const BinaryFunction &BF);
+
+ /// Match up \p BF binary inline trees starting at root or \p Node if set, and
+ /// \p YamlBF profile starting at \p NodeIdx and record the mapping in
+ /// BFToProbeMatchSpecs.
+ void matchInlineTrees(BinaryFunction &BF,
+ const MCDecodedPseudoProbeInlineTree *Node,
+ yaml::bolt::BinaryFunctionProfile &YamlBF,
+ uint32_t NodeIdx, float Scale);
+
/// Check if the profile uses an event with a given \p Name.
bool usesEvent(StringRef Name) const;
};
diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h
index 50ee78d342df..fada73eff75e 100644
--- a/bolt/include/bolt/Profile/YAMLProfileWriter.h
+++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h
@@ -36,16 +36,14 @@ public:
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>;
struct InlineTreeDesc {
template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>;
- using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>;
using GUIDNumMap = GUIDMapTy<uint32_t>;
- GUIDNodeMap TopLevelGUIDToInlineTree;
GUIDNumMap GUIDIdxMap;
GUIDNumMap HashIdxMap;
};
- static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy>
- convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
- const InlineTreeDesc &InlineTree, uint64_t GUID);
+ static InlineTreeMapTy convertBFInlineTree(
+ const MCPseudoProbeDecoder &Decoder, const InlineTreeDesc &InlineTree,
+ const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF);
static std::tuple<yaml::bolt::ProfilePseudoProbeDesc, InlineTreeDesc>
convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index 5c7f1b94315f..33321e30a8ff 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -134,6 +134,7 @@ enum GadgetScannerKind { GS_PACRET, GS_PAUTH, GS_ALL };
extern llvm::cl::bits<GadgetScannerKind> GadgetScannersToRun;
+enum ProbesWriteMode : char { PWM_None = 0, PWM_Default, PWM_Compact };
} // namespace opts
namespace llvm {
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 1d187de11c35..a675f682acc1 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1555,25 +1555,14 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount,
BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount);
BC.outs() << format(
- "BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of "
+ "BOLT-INFO: inference found pseudo probe match for %.2f%% of "
"basic blocks (%zu out of %zu stale) responsible for %.2f%% samples"
" (%zu out of %zu stale)\n",
- 100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks /
- BC.Stats.NumStaleBlocks,
- BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks,
- 100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount /
+ 100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks,
+ BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks,
+ 100.0 * BC.Stats.PseudoProbeMatchedSampleCount /
BC.Stats.StaleSampleCount,
- BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount);
- BC.outs() << format(
- "BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of "
- "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples"
- " (%zu out of %zu stale)\n",
- 100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks /
- BC.Stats.NumStaleBlocks,
- BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks,
- 100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount /
- BC.Stats.StaleSampleCount,
- BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount);
+ BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount);
BC.outs() << format(
"BOLT-INFO: inference found a call match for %.2f%% of basic "
"blocks"
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 6b969011df58..b68cebd8e583 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -114,7 +114,7 @@ MaxSamples("max-samples",
cl::cat(AggregatorCategory));
extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
-extern cl::opt<bool> ProfileWritePseudoProbes;
+extern cl::opt<ProbesWriteMode> ProfileWritePseudoProbes;
extern cl::opt<std::string> SaveProfile;
cl::opt<bool> ReadPreAggregated(
@@ -2388,9 +2388,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
InlineTreeNodeId;
if (BF->getGUID()) {
- std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
- YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
- InlineTree, BF->getGUID());
+ InlineTreeNodeId = YAMLProfileWriter::convertBFInlineTree(
+ *PseudoProbeDecoder, InlineTree, *BF, YamlBF);
}
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 5fb65153cf31..3d3d9384acc5 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -197,6 +197,7 @@ public:
/// release.
class StaleMatcher {
public:
+ FlowBlock *EntryBlock = nullptr;
/// Initialize stale matcher.
void init(const std::vector<FlowBlock *> &Blocks,
const std::vector<BlendedBlockHash> &Hashes,
@@ -204,6 +205,8 @@ public:
assert(Blocks.size() == Hashes.size() &&
Hashes.size() == CallHashes.size() &&
"incorrect matcher initialization");
+ if (!Blocks.empty())
+ EntryBlock = Blocks[0];
for (size_t I = 0; I < Blocks.size(); I++) {
FlowBlock *Block = Blocks[I];
uint16_t OpHash = Hashes[I].OpcodeHash;
@@ -214,47 +217,23 @@ public:
}
}
- /// Creates a mapping from a pseudo probe to a flow block.
- void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) {
- BBPseudoProbeToBlock[Probe] = Block;
- }
-
enum MatchMethod : char {
MATCH_EXACT = 0,
- MATCH_PROBE_EXACT,
- MATCH_PROBE_LOOSE,
MATCH_OPCODE,
MATCH_CALL,
NO_MATCH
};
- /// Find the most similar flow block for a profile block given blended hash.
+ /// Find the most similar flow block for a profile block given its hashes.
std::pair<const FlowBlock *, MatchMethod>
- matchBlockStrict(BlendedBlockHash BlendedHash) {
+ matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash) {
const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash);
if (Block && ExactHash)
return {Block, MATCH_EXACT};
- return {nullptr, NO_MATCH};
- }
-
- /// Find the most similar flow block for a profile block given pseudo probes.
- std::pair<const FlowBlock *, MatchMethod> matchBlockProbe(
- const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes,
- const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) {
- const auto &[ProbeBlock, ExactProbe] =
- matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap);
- if (ProbeBlock)
- return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE};
- return {nullptr, NO_MATCH};
- }
-
- /// Find the most similar flow block for a profile block given its hashes.
- std::pair<const FlowBlock *, MatchMethod>
- matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) {
if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash))
return {CallBlock, MATCH_CALL};
- if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first)
- return {OpcodeBlock, MATCH_OPCODE};
+ if (Block)
+ return {Block, MATCH_OPCODE};
return {nullptr, NO_MATCH};
}
@@ -270,7 +249,6 @@ private:
using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
- DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock;
// Uses OpcodeHash to find the most similar block for a given hash.
std::pair<const FlowBlock *, bool>
@@ -313,57 +291,6 @@ private:
}
return BestBlock;
}
-
- /// Matches a profile block with a binary block based on pseudo probes.
- /// Returns the best matching block (or nullptr) and whether the match is
- /// unambiguous.
- std::pair<const FlowBlock *, bool> matchWithPseudoProbes(
- const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
- const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const {
-
- if (!opts::StaleMatchingWithPseudoProbes)
- return {nullptr, false};
-
- DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
-
- auto matchProfileProbeToBlock = [&](uint32_t NodeId,
- uint64_t ProbeId) -> const FlowBlock * {
- const MCDecodedPseudoProbeInlineTree *BinaryNode =
- InlineTreeNodeMap.getInlineTreeNode(NodeId);
- if (!BinaryNode)
- return nullptr;
- const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0,
- nullptr);
- ArrayRef<MCDecodedPseudoProbe> BinaryProbes = BinaryNode->getProbes();
- auto BinaryProbeIt = llvm::lower_bound(
- BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) {
- return LHS.getIndex() < RHS.getIndex();
- });
- if (BinaryProbeIt == BinaryNode->getProbes().end() ||
- BinaryProbeIt->getIndex() != ProbeId)
- return nullptr;
- auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt);
- if (It == BBPseudoProbeToBlock.end())
- return nullptr;
- return It->second;
- };
-
- for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes)
- for (uint32_t Node : ProfileProbe.InlineTreeNodes)
- for (uint64_t Probe : ProfileProbe.BlockProbes)
- ++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)];
- uint32_t BestMatchCount = 0;
- uint32_t TotalMatchCount = 0;
- const FlowBlock *BestMatchBlock = nullptr;
- for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) {
- TotalMatchCount += Count;
- if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock))
- continue;
- BestMatchBlock = FlowBlock;
- BestMatchCount = Count;
- }
- return {BestMatchBlock, BestMatchCount == TotalMatchCount};
- }
};
void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
@@ -546,12 +473,18 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
/// of the basic blocks in the binary, the count is "matched" to the block.
/// Similarly, if both the source and the target of a count in the profile are
/// matched to a jump in the binary, the count is recorded in CFG.
-size_t matchWeights(
- BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
- const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
- HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
- const BinaryFunction &BF,
- const ArrayRef<YAMLProfileReader::ProbeMatchSpec> ProbeMatchSpecs) {
+size_t matchWeights(BinaryContext &BC,
+ const BinaryFunction::BasicBlockOrderType &BlockOrder,
+ const yaml::bolt::BinaryFunctionProfile &YamlBF,
+ FlowFunction &Func, HashFunction HashFunction,
+ YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
+ const BinaryFunction &BF,
+ const YAMLProfileReader::ProbeMatchSpec &ProbeMatchSpecs);
+
+std::pair<StaleMatcher, std::vector<BlendedBlockHash>>
+initMatcher(BinaryContext &BC,
+ const BinaryFunction::BasicBlockOrderType &BlockOrder,
+ FlowFunction &Func, HashFunction HashFunction) {
assert(Func.Blocks.size() == BlockOrder.size() + 2);
@@ -581,46 +514,23 @@ size_t matchWeights(
<< Twine::utohexstr(BB->getHash()) << "\n");
}
StaleMatcher Matcher;
- // Collects function pseudo probes for use in the StaleMatcher.
- if (opts::StaleMatchingWithPseudoProbes) {
- const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
- assert(Decoder &&
- "If pseudo probes are in use, pseudo probe decoder should exist");
- const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap();
- const uint64_t FuncAddr = BF.getAddress();
- for (const MCDecodedPseudoProbe &Probe :
- ProbeMap.find(FuncAddr, FuncAddr + BF.getSize()))
- if (const BinaryBasicBlock *BB =
- BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr))
- Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]);
- }
Matcher.init(Blocks, BlendedHashes, CallHashes);
+ return {Matcher, BlendedHashes};
+}
+
+using FlowBlockTy = std::pair<const FlowBlock *, float>;
+using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>;
- using FlowBlockTy =
- std::pair<const FlowBlock *, const yaml::bolt::BinaryBasicBlockProfile *>;
- using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>;
- // Binary profile => block index => matched block + its block profile
- DenseMap<const yaml::bolt::BinaryFunctionProfile *, ProfileBlockMatchMap>
- MatchedBlocks;
-
- // Map of FlowBlock and matching method.
- DenseMap<const FlowBlock *, StaleMatcher::MatchMethod> MatchedFlowBlocks;
-
- auto addMatchedBlock =
- [&](std::pair<const FlowBlock *, StaleMatcher::MatchMethod> BlockMethod,
- const yaml::bolt::BinaryFunctionProfile &YamlBP,
- const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
- const auto &[MatchedBlock, Method] = BlockMethod;
- if (!MatchedBlock)
- return;
- // Don't override earlier matches
- if (MatchedFlowBlocks.contains(MatchedBlock))
- return;
- MatchedFlowBlocks.try_emplace(MatchedBlock, Method);
- MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB};
- };
-
- // Match blocks from the profile to the blocks in CFG by strict hash.
+// Match \p YamlBF blocks and return a mapping from block index to matched flow
+// block.
+ProfileBlockMatchMap
+matchBlocks(BinaryContext &BC, const yaml::bolt::BinaryFunctionProfile &YamlBF,
+ HashFunction HashFunction,
+ YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
+ StaleMatcher &Matcher, ArrayRef<BlendedBlockHash> BlendedHashes) {
+ ProfileBlockMatchMap MatchedBlocks;
+
+ // Match blocks from the profile to the blocks in CFG.
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
// Update matching stats.
++BC.Stats.NumStaleBlocks;
@@ -628,19 +538,6 @@ size_t matchWeights(
assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
BlendedBlockHash YamlHash(YamlBB.Hash);
- addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB);
- }
- // Match blocks from the profile to the blocks in CFG by pseudo probes.
- for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) {
- for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks)
- if (!BB.PseudoProbes.empty())
- addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap),
- YamlBP, BB);
- }
- // Match blocks from the profile to the blocks in CFG with loose methods.
- for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
- assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
- BlendedBlockHash YamlHash(YamlBB.Hash);
std::string CallHashStr = hashBlockCalls(IdToYamlBF, YamlBB);
uint64_t CallHash = 0;
@@ -652,9 +549,9 @@ size_t matchWeights(
else
llvm_unreachable("Unhandled HashFunction");
}
- auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash);
+ auto [MatchedBlock, Method] = Matcher.matchBlock(YamlHash, CallHash);
if (MatchedBlock == nullptr && YamlBB.Index == 0) {
- MatchedBlock = Blocks[0];
+ MatchedBlock = Matcher.EntryBlock;
// Report as loose match
Method = StaleMatcher::MATCH_OPCODE;
}
@@ -664,96 +561,89 @@ size_t matchWeights(
<< "\n");
continue;
}
- addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB);
+ MatchedBlocks[YamlBB.Index] = {MatchedBlock, 1};
+ BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1];
+ LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBB.Index << ")"
+ << " with hash " << Twine::utohexstr(YamlBB.Hash)
+ << " to BB (index = " << MatchedBlock->Index - 1 << ")"
+ << " with hash " << Twine::utohexstr(BinHash.combine())
+ << "\n");
+ (void)BinHash;
+ uint64_t ExecCount = YamlBB.ExecCount;
+ // Update matching stats accounting for the matched block.
+ switch (Method) {
+ case StaleMatcher::MATCH_EXACT:
+ ++BC.Stats.NumExactMatchedBlocks;
+ BC.Stats.ExactMatchedSampleCount += ExecCount;
+ LLVM_DEBUG(dbgs() << " exact match\n");
+ break;
+ case StaleMatcher::MATCH_CALL:
+ ++BC.Stats.NumCallMatchedBlocks;
+ BC.Stats.CallMatchedSampleCount += ExecCount;
+ LLVM_DEBUG(dbgs() << " call match\n");
+ break;
+ case StaleMatcher::MATCH_OPCODE:
+ ++BC.Stats.NumLooseMatchedBlocks;
+ BC.Stats.LooseMatchedSampleCount += ExecCount;
+ LLVM_DEBUG(dbgs() << " loose match\n");
+ break;
+ case StaleMatcher::NO_MATCH:
+ LLVM_DEBUG(dbgs() << " no match\n");
+ }
}
+ return MatchedBlocks;
+}
- // Match jumps from the profile to the jumps from CFG
- std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
- std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
-
- for (const auto &[YamlBF, MatchMap] : MatchedBlocks) {
- for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) {
- const auto &[MatchedBlock, YamlBB] = FlowBlockProfile;
- StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock);
- BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1];
- LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")"
- << " with hash " << Twine::utohexstr(YamlBB->Hash)
- << " to BB (index = " << MatchedBlock->Index - 1 << ")"
- << " with hash " << Twine::utohexstr(BinHash.combine())
- << "\n");
- (void)BinHash;
- uint64_t ExecCount = YamlBB->ExecCount;
- // Update matching stats accounting for the matched block.
- switch (Method) {
- case StaleMatcher::MATCH_EXACT:
- ++BC.Stats.NumExactMatchedBlocks;
- BC.Stats.ExactMatchedSampleCount += ExecCount;
- LLVM_DEBUG(dbgs() << " exact match\n");
- break;
- case StaleMatcher::MATCH_PROBE_EXACT:
- ++BC.Stats.NumPseudoProbeExactMatchedBlocks;
- BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount;
- LLVM_DEBUG(dbgs() << " exact pseudo probe match\n");
- break;
- case StaleMatcher::MATCH_PROBE_LOOSE:
- ++BC.Stats.NumPseudoProbeLooseMatchedBlocks;
- BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount;
- LLVM_DEBUG(dbgs() << " loose pseudo probe match\n");
- break;
- case StaleMatcher::MATCH_CALL:
- ++BC.Stats.NumCallMatchedBlocks;
- BC.Stats.CallMatchedSampleCount += ExecCount;
- LLVM_DEBUG(dbgs() << " call match\n");
- break;
- case StaleMatcher::MATCH_OPCODE:
- ++BC.Stats.NumLooseMatchedBlocks;
- BC.Stats.LooseMatchedSampleCount += ExecCount;
- LLVM_DEBUG(dbgs() << " loose match\n");
- break;
- case StaleMatcher::NO_MATCH:
- LLVM_DEBUG(dbgs() << " no match\n");
- }
- }
+/// Move \p YamlBF edge weights to flow function using \p MatchedBlocks, and
+/// update \p OutWeight and \p InWeight arrays.
+void transferEdgeWeights(ProfileBlockMatchMap &MatchedBlocks,
+ MutableArrayRef<uint64_t> OutWeight,
+ MutableArrayRef<uint64_t> InWeight,
+ const yaml::bolt::BinaryFunctionProfile &YamlBF) {
+ for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
+ for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
+ if (YamlSI.Count == 0)
+ continue;
- for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) {
- for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
- if (YamlSI.Count == 0)
- continue;
-
- // Try to find the jump for a given (src, dst) pair from the profile and
- // assign the jump weight based on the profile count
- const uint64_t SrcIndex = YamlBB.Index;
- const uint64_t DstIndex = YamlSI.Index;
-
- const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first;
- const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first;
-
- if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) {
- // Find a jump between the two blocks
- FlowJump *Jump = nullptr;
- for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) {
- if (SuccJump->Target == MatchedDstBlock->Index) {
- Jump = SuccJump;
- break;
- }
+ // Try to find the jump for a given (src, dst) pair from the profile and
+ // assign the jump weight based on the profile count
+ const uint64_t SrcIndex = YamlBB.Index;
+ const uint64_t DstIndex = YamlSI.Index;
+
+ const auto &[MatchedSrcBlock, SrcScale] = MatchedBlocks.lookup(SrcIndex);
+ const auto &[MatchedDstBlock, DstScale] = MatchedBlocks.lookup(DstIndex);
+
+ if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) {
+ // Find a jump between the two blocks
+ FlowJump *Jump = nullptr;
+ for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) {
+ if (SuccJump->Target == MatchedDstBlock->Index) {
+ Jump = SuccJump;
+ break;
}
- // Assign the weight, if the corresponding jump is found
- if (Jump != nullptr) {
- Jump->Weight = YamlSI.Count;
+ }
+ // Assign the weight, if the corresponding jump is found
+ if (Jump != nullptr) {
+ if (uint64_t Count = SrcScale * YamlSI.Count) {
+ Jump->Weight += Count;
Jump->HasUnknownWeight = false;
}
}
- // Assign the weight for the src block, if it is found
- if (MatchedSrcBlock != nullptr)
- OutWeight[MatchedSrcBlock->Index] += YamlSI.Count;
- // Assign the weight for the dst block, if it is found
- if (MatchedDstBlock != nullptr)
- InWeight[MatchedDstBlock->Index] += YamlSI.Count;
}
+ // Assign the weight for the src block, if it is found
+ if (MatchedSrcBlock != nullptr)
+ OutWeight[MatchedSrcBlock->Index] += SrcScale * YamlSI.Count;
+ // Assign the weight for the dst block, if it is found
+ if (MatchedDstBlock != nullptr)
+ InWeight[MatchedDstBlock->Index] += DstScale * YamlSI.Count;
}
}
+}
- // Assign block counts based on in-/out- jumps
+// Assign block counts based on in-/out- jumps
+size_t setBlockWeights(FlowFunction &Func, ArrayRef<uint64_t> OutWeight,
+ ArrayRef<uint64_t> InWeight) {
+ size_t Matched = 0;
for (FlowBlock &Block : Func.Blocks) {
if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) {
assert(Block.HasUnknownWeight && "unmatched block with a positive count");
@@ -761,9 +651,97 @@ size_t matchWeights(
}
Block.HasUnknownWeight = false;
Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
+ ++Matched;
+ }
+
+ return Matched;
+}
+
+/// Assign initial block/jump weights based on the stale profile data. The goal
+/// is to extract as much information from the stale profile as possible. Here
+/// we assume that each basic block is specified via a hash value computed from
+/// its content and the hashes of the unchanged basic blocks stay the same
+/// across different revisions of the binary. Blocks may also have pseudo probe
+/// information in the profile and the binary which is used for matching.
+/// Whenever there is a count in the profile with the hash corresponding to one
+/// of the basic blocks in the binary, the count is "matched" to the block.
+/// Similarly, if both the source and the target of a count in the profile are
+/// matched to a jump in the binary, the count is recorded in CFG.
+size_t matchWeights(BinaryContext &BC,
+ const BinaryFunction::BasicBlockOrderType &BlockOrder,
+ const yaml::bolt::BinaryFunctionProfile &YamlBF,
+ FlowFunction &Func, HashFunction HashFunction,
+ YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
+ const BinaryFunction &BF,
+ const YAMLProfileReader::ProbeMatchSpec &ProbeMatchSpecs) {
+ using namespace yaml::bolt;
+
+ assert(Func.Blocks.size() == BlockOrder.size() + 2);
+
+ auto [Matcher, BlendedHashes] =
+ initMatcher(BC, BlockOrder, Func, HashFunction);
+
+ auto getBlock = [&](const MCDecodedPseudoProbeInlineTree *Node,
+ uint64_t Id) -> const FlowBlock * {
+ if (!Node)
+ return nullptr;
+ const uint64_t Addr = BF.getAddress();
+ const MCDecodedPseudoProbe Dummy(0, Id, PseudoProbeType::Block, 0, 0,
+ nullptr);
+ ArrayRef<MCDecodedPseudoProbe> Probes = Node->getProbes();
+ auto Cmp = [](auto &L, auto &R) { return L.getIndex() < R.getIndex(); };
+ const MCDecodedPseudoProbe *Probe = llvm::lower_bound(Probes, Dummy, Cmp);
+ if (Probe == Probes.end() || Probe->getIndex() != Id)
+ return nullptr;
+ if (auto *BB = BF.getBasicBlockContainingOffset(Probe->getAddress() - Addr))
+ return &Func.Blocks[BB->getIndex() + 1];
+ return nullptr;
+ };
+
+ // Match jumps from the profile to the jumps from CFG
+ std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
+ std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
+
+ // Match blocks from the profile to the blocks in CFG by pseudo probes.
+ for (const auto &[YamlBF, StartIdxNodeMap] : ProbeMatchSpecs) {
+ // Block index => matched block + its block profile
+ ProfileBlockMatchMap MatchedBlocks;
+ for (const auto &[RootIdx, NodeMapScale] : StartIdxNodeMap) {
+ ArrayRef NodeMap = NodeMapScale.first;
+ const float Scale = NodeMapScale.second;
+ for (const BinaryBasicBlockProfile &BB : YamlBF->Blocks) {
+ auto matchProbe = [&](uint32_t Node, uint64_t Id) {
+ if (const FlowBlock *Block = getBlock(NodeMap[Node], Id)) {
+ MatchedBlocks[BB.Index] = {Block, Scale};
+ ++BC.Stats.NumPseudoProbeMatchedBlocks;
+ BC.Stats.PseudoProbeMatchedSampleCount += BB.ExecCount;
+ }
+ };
+ for (const PseudoProbeInfo &PI : BB.PseudoProbes)
+ for (uint32_t Node : PI.InlineTreeNodes)
+ for (uint64_t Id : PI.BlockProbes)
+ matchProbe(Node, Id);
+ for (const CallSiteInfo &CSI : BB.CallSites)
+ if (CSI.Probe)
+ matchProbe(CSI.InlineTreeNode, CSI.Probe);
+ }
+ }
+ ProfileBlockMatchMap StaleMatchedBlocks = matchBlocks(
+ BC, *YamlBF, HashFunction, IdToYamlBF, Matcher, BlendedHashes);
+ for (const auto &[Id, FlowBlockPair] : StaleMatchedBlocks)
+ // TBD: determine scaling factor
+ if (!MatchedBlocks.contains(Id))
+ MatchedBlocks[Id] = FlowBlockPair;
+
+ transferEdgeWeights(MatchedBlocks, OutWeight, InWeight, *YamlBF);
+ }
+ if (ProbeMatchSpecs.empty()) {
+ ProfileBlockMatchMap MatchedBlocks = matchBlocks(
+ BC, YamlBF, HashFunction, IdToYamlBF, Matcher, BlendedHashes);
+ transferEdgeWeights(MatchedBlocks, OutWeight, InWeight, YamlBF);
}
- return MatchedBlocks[&YamlBF].size();
+ return setBlockWeights(Func, OutWeight, InWeight);
}
/// The function finds all blocks that are (i) reachable from the Entry block
@@ -982,7 +960,7 @@ void assignProfile(BinaryFunction &BF,
bool YAMLProfileReader::inferStaleProfile(
BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF,
- const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs) {
+ const ProbeMatchSpec &ProbeMatchSpecs) {
NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite",
"Rewrite passes", opts::TimeRewrite);
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0f87f9baec3..fdaa98f5f72e 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -9,16 +9,20 @@
#include "bolt/Profile/YAMLProfileReader.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
-#include "bolt/Passes/MCF.h"
#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Utils/NameResolver.h"
#include "bolt/Utils/Utils.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/edit_distance.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "bolt-prof"
+
using namespace llvm;
namespace opts {
@@ -352,11 +356,7 @@ bool YAMLProfileReader::parseFunctionProfile(
if (!opts::InferStaleProfile)
return false;
- ArrayRef<ProbeMatchSpec> ProbeMatchSpecs;
- auto BFIt = BFToProbeMatchSpecs.find(&BF);
- if (BFIt != BFToProbeMatchSpecs.end())
- ProbeMatchSpecs = BFIt->second;
- ProfileMatched = inferStaleProfile(BF, YamlBF, ProbeMatchSpecs);
+ ProfileMatched = inferStaleProfile(BF, YamlBF, BFToProbeMatchSpecs[&BF]);
}
if (ProfileMatched)
BF.markProfiled(YamlBP.Header.Flags);
@@ -364,6 +364,34 @@ bool YAMLProfileReader::parseFunctionProfile(
return ProfileMatched;
}
+// Inline tree: decode index deltas and indirection through \p YamlPD and
+// set decoded fields (GUID, Hash, ParentIndex).
+// Probe inline tree: move InlineTreeIndex into InlineTreeNodes.
+static void
+decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD,
+ yaml::bolt::BinaryFunctionProfile &YamlBF) {
+ // Decompress inline tree
+ uint32_t ParentId = 0;
+ uint32_t PrevGUIDIdx = 0;
+ for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlBF.InlineTree) {
+ uint32_t GUIDIdx = InlineTreeNode.GUIDIndex;
+ if (GUIDIdx != UINT32_MAX)
+ PrevGUIDIdx = GUIDIdx;
+ else
+ GUIDIdx = PrevGUIDIdx;
+ uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
+ ParentId += InlineTreeNode.ParentIndexDelta;
+ InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx];
+ InlineTreeNode.Hash = YamlPD.Hash[HashIdx];
+ InlineTreeNode.ParentIndexDelta = ParentId;
+ }
+ // Decompress probe descriptors
+ for (yaml::bolt::BinaryBasicBlockProfile &BB : YamlBF.Blocks) {
+ if (BB.PseudoProbesStr.empty())
+ continue;
+ }
+}
+
Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Filename);
@@ -396,8 +424,11 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
// Match profile to function based on a function name.
buildNameMaps(BC);
- // Preliminary assign function execution count.
+ // Preliminary assign function execution count and decode pseudo probe info.
+ const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc;
+ YamlGUIDs.insert(YamlPD.GUID.begin(), YamlPD.GUID.end());
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
+ decodeYamlInlineTree(YamlPD, YamlBF);
if (!BF)
continue;
if (!BF->hasProfile()) {
@@ -433,6 +464,22 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
return true;
}
}
+ if (opts::StaleMatchingWithPseudoProbes) {
+ SmallVector<StringRef, 0> Suffixes(
+ {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+ for (const MCSymbol *Sym : BF.getSymbols()) {
+ StringRef SymName = Sym->getName();
+ for (auto Name : {std::optional(NameResolver::restore(SymName)),
+ getCommonName(SymName, false, Suffixes)}) {
+ if (!Name)
+ continue;
+ SymName = *Name;
+ uint64_t GUID = Function::getGUIDAssumingExternalLinkage(SymName);
+ if (YamlGUIDs.count(GUID))
+ return true;
+ }
+ }
+ }
return false;
}
@@ -448,7 +495,8 @@ size_t YAMLProfileReader::matchWithExactName() {
// the profile.
Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
- if (profileMatches(YamlBF, Function)) {
+ // Allow mismatching profile when stale matching is enabled.
+ if (profileMatches(YamlBF, Function) || opts::InferStaleProfile) {
matchProfileToFunction(YamlBF, Function);
++MatchedWithExactName;
}
@@ -591,59 +639,260 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
return MatchedWithCallGraph;
}
-size_t YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
- const MCPseudoProbeDecoder &Decoder,
- const std::vector<yaml::bolt::InlineTreeNode> &DecodedInlineTree,
- const MCDecodedPseudoProbeInlineTree *Root) {
- // Match inline tree nodes by GUID, checksum, parent, and call site.
- for (const auto &[InlineTreeNodeId, InlineTreeNode] :
- llvm::enumerate(DecodedInlineTree)) {
- uint64_t GUID = InlineTreeNode.GUID;
- uint64_t Hash = InlineTreeNode.Hash;
- uint32_t ParentId = InlineTreeNode.ParentIndexDelta;
- uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
- const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
- if (!InlineTreeNodeId) {
- Cur = Root;
- } else if (const MCDecodedPseudoProbeInlineTree *Parent =
- getInlineTreeNode(ParentId)) {
- for (const MCDecodedPseudoProbeInlineTree &Child :
- Parent->getChildren()) {
- if (Child.Guid == GUID) {
- if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
- Cur = &Child;
- break;
- }
+const MCDecodedPseudoProbeInlineTree *
+YAMLProfileReader::lookupTopLevelNode(const BinaryFunction &BF) {
+ const BinaryContext &BC = BF.getBinaryContext();
+ const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
+ assert(Decoder &&
+ "If pseudo probes are in use, pseudo probe decoder should exist");
+ uint64_t Addr = BF.getAddress();
+ uint64_t Size = BF.getSize();
+ auto Probes = Decoder->getAddress2ProbesMap().find(Addr, Addr + Size);
+ if (Probes.empty())
+ return nullptr;
+ const MCDecodedPseudoProbe &Probe = *Probes.begin();
+ const MCDecodedPseudoProbeInlineTree *Root = Probe.getInlineTreeNode();
+ while (Root->hasInlineSite())
+ Root = (const MCDecodedPseudoProbeInlineTree *)Root->Parent;
+ return Root;
+}
+
+size_t YAMLProfileReader::matchInlineTreesImpl(
+ BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF,
+ const MCDecodedPseudoProbeInlineTree &Root, uint32_t RootIdx,
+ ArrayRef<yaml::bolt::InlineTreeNode> ProfileInlineTree,
+ MutableArrayRef<const MCDecodedPseudoProbeInlineTree *> Map, float Scale) {
+ using namespace yaml::bolt;
+ BinaryContext &BC = BF.getBinaryContext();
+ const MCPseudoProbeDecoder &Decoder = *BC.getPseudoProbeDecoder();
+ const InlineTreeNode &FuncNode = ProfileInlineTree[RootIdx];
+
+ using ChildMapTy =
+ std::unordered_map<InlineSite, const MCDecodedPseudoProbeInlineTree *,
+ InlineSiteHash>;
+ using CallSiteInfoTy =
+ std::unordered_map<InlineSite, const CallSiteInfo *, InlineSiteHash>;
+ // Mapping from a parent node id to a map InlineSite -> Child node.
+ DenseMap<uint32_t, ChildMapTy> ParentToChildren;
+ // Collect calls in the profile: map from a parent node id to a map
+ // InlineSite -> CallSiteInfo ptr.
+ DenseMap<uint32_t, CallSiteInfoTy> ParentToCSI;
+ for (const BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
+ // Collect callees for inlined profile matching, indexed by InlineSite.
+ for (const CallSiteInfo &CSI : YamlBB.CallSites) {
+ ProbeMatchingStats.TotalCallCount += CSI.Count;
+ ++ProbeMatchingStats.TotalCallSites;
+ if (CSI.Probe == 0) {
+ LLVM_DEBUG(dbgs() << "no probe for " << CSI.DestId << " " << CSI.Count
+ << '\n');
+ ++ProbeMatchingStats.MissingCallProbe;
+ ProbeMatchingStats.MissingCallCount += CSI.Count;
+ continue;
+ }
+ const BinaryFunctionProfile *Callee = IdToYamLBF.lookup(CSI.DestId);
+ if (!Callee) {
+ LLVM_DEBUG(dbgs() << "no callee for " << CSI.DestId << " " << CSI.Count
+ << '\n');
+ ++ProbeMatchingStats.MissingCallee;
+ ProbeMatchingStats.MissingCallCount += CSI.Count;
+ continue;
+ }
+ // Get callee GUID
+ if (Callee->InlineTree.empty()) {
+ LLVM_DEBUG(dbgs() << "no inline tree for " << Callee->Name << '\n');
+ ++ProbeMatchingStats.MissingInlineTree;
+ ProbeMatchingStats.MissingCallCount += CSI.Count;
+ continue;
+ }
+ uint64_t CalleeGUID = Callee->InlineTree.front().GUID;
+ ParentToCSI[CSI.InlineTreeNode][InlineSite(CalleeGUID, CSI.Probe)] = &CSI;
+ }
+ }
+ LLVM_DEBUG({
+ for (auto &[ParentId, InlineSiteCSI] : ParentToCSI) {
+ for (auto &[InlineSite, CSI] : InlineSiteCSI) {
+ auto [CalleeGUID, CallSite] = InlineSite;
+ errs() << ParentId << "@" << CallSite << "->"
+ << Twine::utohexstr(CalleeGUID) << ": " << CSI->Count << ", "
+ << Twine::utohexstr(CSI->Offset) << '\n';
+ }
+ }
+ });
+
+ assert(!Root.isRoot());
+ LLVM_DEBUG(dbgs() << "matchInlineTreesImpl for " << BF << "@"
+ << Twine::utohexstr(Root.Guid) << " and " << YamlBF.Name
+ << "@" << Twine::utohexstr(FuncNode.GUID) << '\n');
+ ++ProbeMatchingStats.AttemptedNodes;
+ ++ProbeMatchingStats.AttemptedRoots;
+
+ // Match profile function with a lead node (top-level function or inlinee)
+ if (Root.Guid != FuncNode.GUID) {
+ LLVM_DEBUG(dbgs() << "Mismatching root GUID\n");
+ ++ProbeMatchingStats.MismatchingRootGUID;
+ return 0;
+ }
+ {
+ uint64_t BinaryHash = Decoder.getFuncDescForGUID(Root.Guid)->FuncHash;
+ uint64_t ProfileHash = FuncNode.Hash;
+ if (BinaryHash != ProfileHash) {
+ LLVM_DEBUG(dbgs() << "Mismatching hashes: "
+ << Twine::utohexstr(BinaryHash) << " "
+ << Twine::utohexstr(ProfileHash) << '\n');
+ ++ProbeMatchingStats.MismatchingRootHash;
+ return 0;
+ }
+ }
+ assert(!Map[RootIdx]);
+ Map[RootIdx] = &Root;
+ ++ProbeMatchingStats.MatchedRoots;
+
+ uint64_t Matched = 1;
+ for (const auto &[Idx, ProfileNode] :
+ llvm::drop_begin(llvm::enumerate(ProfileInlineTree), RootIdx + 1)) {
+ // Can't match children if parent is not matched.
+ uint32_t ParentIdx = ProfileNode.ParentIndexDelta;
+ const MCDecodedPseudoProbeInlineTree *Parent = Map[ParentIdx];
+ // Exclude nodes with unmatched parent from attempted stats.
+ if (!Parent)
+ continue;
+ ++ProbeMatchingStats.AttemptedNodes;
+ if (!ParentToChildren.contains(ParentIdx))
+ for (const MCDecodedPseudoProbeInlineTree &Child : Parent->getChildren())
+ ParentToChildren[ParentIdx].emplace(InlineSite(Child.getInlineSite()),
+ &Child);
+ ChildMapTy &ChildMap = ParentToChildren[ParentIdx];
+ InlineSite Site(ProfileNode.GUID, ProfileNode.CallSiteProbe);
+ auto ChildIt = ChildMap.find(Site);
+ if (ChildIt == ChildMap.end()) {
+ // No binary inline tree node for a given profile inline tree node =>
+ // the function is inlined in the profile and is outlined in the binary.
+ // Look up the callee binary function and attach profile inline subtree
+ // to it. The profile will be additively included into the function.
+ LLVM_DEBUG({
+ dbgs() << "No binary inline tree for "
+ << Twine::utohexstr(ProfileNode.GUID);
+ auto G2FDMap = Decoder.getGUID2FuncDescMap();
+ auto FDIt = G2FDMap.find(ProfileNode.GUID);
+ if (FDIt != G2FDMap.end())
+ dbgs() << " " << FDIt->FuncName;
+ dbgs() << '\n';
+ });
+ for (auto It : llvm::make_range(GUIDToBF.equal_range(ProfileNode.GUID))) {
+ BinaryFunction *CalleeBF = It.second;
+ LLVM_DEBUG(dbgs() << "Found outlined function "
+ << CalleeBF->getPrintName() << '\n');
+ matchInlineTrees(*CalleeBF, nullptr, YamlBF, Idx, 1);
+ }
+ continue;
+ }
+ const MCDecodedPseudoProbeInlineTree *Child = ChildIt->second;
+ uint64_t BinaryHash = Decoder.getFuncDescForGUID(Child->Guid)->FuncHash;
+ if (BinaryHash == ProfileNode.Hash) {
+ // Match inline trees.
+ assert(!Map[Idx]);
+ Map[Idx] = Child;
+ ++Matched;
+ } else {
+ LLVM_DEBUG(dbgs() << "Mismatching hashes: "
+ << Twine::utohexstr(BinaryHash) << " "
+ << Twine::utohexstr(ProfileNode.Hash) << '\n');
+ ++ProbeMatchingStats.MismatchingNodeHash;
+ }
+ ChildMap.erase(ChildIt);
+ }
+
+ LLVM_DEBUG(dbgs() << "Matching outlined binary inline tree nodes\n");
+ // Binary inline trees without correspondence in the profile => nodes are
+ // inlined in the binary and outlined in the profile. Match function-level
+ // profiles to these nodes. The profile will be scaled down based on call
+ // site count in the parent profile.
+ for (const auto &[ParentIdx, ChildMap] : ParentToChildren) {
+ const auto &CSI = ParentToCSI[ParentIdx];
+ for (const auto &[CallSite, Node] : ChildMap) {
+ uint64_t GUID = Node->Guid;
+ assert(GUID == std::get<0>(CallSite));
+ auto ChildCSIIt = CSI.find(CallSite);
+ if (ChildCSIIt == CSI.end()) {
+ LLVM_DEBUG({
+ dbgs() << "Can't find profile call site info for "
+ << Twine::utohexstr(GUID);
+ auto G2FDMap = Decoder.getGUID2FuncDescMap();
+ auto FDIt = G2FDMap.find(GUID);
+ if (FDIt != G2FDMap.end())
+ dbgs() << " " << FDIt->FuncName << " H "
+ << Twine::utohexstr(FDIt->FuncHash);
+ dbgs() << '\n';
+ });
+ ++ProbeMatchingStats.MissingProfileNode;
+ continue;
}
+ const CallSiteInfo *ChildCSI = ChildCSIIt->second;
+ assert(ChildCSI);
+ BinaryFunctionProfile *ChildYamlBF = IdToYamLBF.lookup(ChildCSI->DestId);
+ assert(ChildYamlBF);
+ uint64_t ChildExecCount = ChildYamlBF->ExecCount;
+ uint64_t Freq = ChildCSI->Count;
+ float ChildScale = ChildExecCount ? 1.f * Freq / ChildExecCount : 1;
+ LLVM_DEBUG({
+ dbgs() << "Match inlined node " << Twine::utohexstr(GUID)
+ << " at call site " << std::get<1>(CallSite) << " in parent id "
+ << ParentIdx << " " << Twine::utohexstr(Map[ParentIdx]->Guid)
+ << " with inline exec count " << Freq
+ << " and outlined exec count " << ChildExecCount << '\n';
+ });
+ matchInlineTrees(BF, Node, *ChildYamlBF, 0, /*Scale * */ ChildScale);
}
- // Don't match nodes if the profile is stale (mismatching binary FuncHash
- // and YAML Hash)
- if (Cur && Decoder.getFuncDescForGUID(Cur->Guid)->FuncHash == Hash)
- mapInlineTreeNode(InlineTreeNodeId, Cur);
}
- return Map.size();
+ LLVM_DEBUG(dbgs() << "matchInlineTreesImpl done for " << BF << "@"
+ << Twine::utohexstr(Root.Guid) << " and " << YamlBF.Name
+ << "@" << Twine::utohexstr(FuncNode.GUID) << ": " << Matched
+ << "/" << Map.size() << '\n');
+ ProbeMatchingStats.MatchedNodes += Matched;
+ return Matched;
}
-// Decode index deltas and indirection through \p YamlPD. Return modified copy
-// of \p YamlInlineTree with populated decoded fields (GUID, Hash, ParentIndex).
-static std::vector<yaml::bolt::InlineTreeNode>
-decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD,
- std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree) {
- uint32_t ParentId = 0;
- uint32_t PrevGUIDIdx = 0;
- for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
- uint32_t GUIDIdx = InlineTreeNode.GUIDIndex;
- if (GUIDIdx != UINT32_MAX)
- PrevGUIDIdx = GUIDIdx;
- else
- GUIDIdx = PrevGUIDIdx;
- uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
- ParentId += InlineTreeNode.ParentIndexDelta;
- InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx];
- InlineTreeNode.Hash = YamlPD.Hash[HashIdx];
- InlineTreeNode.ParentIndexDelta = ParentId;
+void YAMLProfileReader::matchInlineTrees(
+ BinaryFunction &BF, const MCDecodedPseudoProbeInlineTree *Node,
+ yaml::bolt::BinaryFunctionProfile &YamlBF, uint32_t RootIdx, float Scale) {
+ ArrayRef Tree = YamlBF.InlineTree;
+ size_t Size = Tree.size();
+ if (!Size)
+ return;
+ BinaryContext &BC = BF.getBinaryContext();
+ LLVM_DEBUG(dbgs() << "Match inline trees for " << BF << " @"
+ << (Node ? Twine::utohexstr(Node->Guid) : "root") << " and "
+ << YamlBF.Name << "@" << RootIdx << " with scale " << Scale
+ << '\n');
+
+ if (!Node)
+ Node = lookupTopLevelNode(BF);
+ if (!Node)
+ return;
+ RootIdxToMapTy &FuncMatchSpecs = BFToProbeMatchSpecs[&BF][&YamlBF];
+ auto MatchIt = FuncMatchSpecs.find(RootIdx);
+ if (MatchIt != FuncMatchSpecs.end()) {
+ LLVM_DEBUG(dbgs() << "Duplicate match attempt, skip\n");
+ return;
}
- return YamlInlineTree;
+ const auto &[It, _] = FuncMatchSpecs.emplace(
+ RootIdx, std::pair(InlineTreeNodeMapTy(Size), Scale));
+ MutableArrayRef Map = It->second.first;
+ size_t N = matchInlineTreesImpl(BF, YamlBF, *Node, RootIdx, Tree, Map, Scale);
+ if (N)
+ YamlBF.Used = true;
+ LLVM_DEBUG({
+ const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
+ dbgs() << N << "/" << Size << " match with " << BF << " at ";
+ // Print the inline context of head probe
+ ListSeparator LS(" inlined into ");
+ do {
+ dbgs() << LS << Decoder->getFuncDescForGUID(Node->Guid)->FuncName << ":"
+ << std::get<1>(Node->getInlineSite());
+ Node = (const MCDecodedPseudoProbeInlineTree *)Node->Parent;
+ } while (!Node->isRoot());
+ dbgs() << '\n';
+ });
}
size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) {
@@ -651,12 +900,15 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) {
return 0;
const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
- const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc;
// Set existing BF->YamlBF match into ProbeMatchSpecs for (local) probe
// matching.
assert(Decoder &&
"If pseudo probes are in use, pseudo probe decoder should exist");
+ for (BinaryFunction *BF : BC.getAllBinaryFunctions())
+ if (uint64_t GUID = BF->getGUID())
+ GUIDToBF.emplace(GUID, BF);
+
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
// BF is preliminary name-matched function to YamlBF
// MatchedBF is final matched function
@@ -668,22 +920,75 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) {
uint64_t GUID = BF->getGUID();
if (!GUID)
continue;
- auto It = TopLevelGUIDToInlineTree.find(GUID);
- if (It == TopLevelGUIDToInlineTree.end())
- continue;
- const MCDecodedPseudoProbeInlineTree *Node = It->second;
- assert(Node && "Malformed TopLevelGUIDToInlineTree");
- auto &MatchSpecs = BFToProbeMatchSpecs[BF];
- auto &InlineTreeMap =
- MatchSpecs.emplace_back(InlineTreeNodeMapTy(), YamlBF).first;
- std::vector<yaml::bolt::InlineTreeNode> ProfileInlineTree =
- decodeYamlInlineTree(YamlPD, YamlBF.InlineTree);
- // Erase unsuccessful match
- if (!InlineTreeMap.matchInlineTrees(*Decoder, ProfileInlineTree, Node))
- MatchSpecs.pop_back();
- }
-
- return 0;
+ // Match YamlBF with BF at top-level inline tree level.
+ matchInlineTrees(*BF, nullptr, YamlBF, 0, 1);
+ }
+ // Remove empty match specs
+ for (auto &[BF, ProbeMatchSpecs] : BFToProbeMatchSpecs) {
+ for (auto PMII = ProbeMatchSpecs.begin(); PMII != ProbeMatchSpecs.end();) {
+ RootIdxToMapTy &RootIdxToMap = PMII->second;
+ for (auto II = RootIdxToMap.begin(); II != RootIdxToMap.end();) {
+ if (llvm::any_of(II->second.first, [](auto *P) { return P; }))
+ ++II;
+ else
+ II = RootIdxToMap.erase(II);
+ }
+ if (RootIdxToMap.empty())
+ PMII = ProbeMatchSpecs.erase(PMII);
+ else
+ ++PMII;
+ }
+ // Don't drop empty BFToProbeMatchSpecs - used during matching
+ }
+ outs() << "BOLT-INFO: pseudo probe profile matching matched "
+ << formatv("{0:p} ({1}/{2}) ",
+ 1.0 * ProbeMatchingStats.MatchedRoots /
+ ProbeMatchingStats.AttemptedRoots,
+ ProbeMatchingStats.MatchedRoots,
+ ProbeMatchingStats.AttemptedRoots)
+ << "inline tree root(s) corresponding to profile or binary functions. "
+
+ << "Couldn't match " << ProbeMatchingStats.MismatchingRootGUID
+ << " roots with mismatching GUIDs, "
+ << ProbeMatchingStats.MismatchingRootHash
+ << " with mismatching hash.\n"
+
+ << "BOLT-INFO: pseudo probe profile matching matched "
+ << formatv("{0:p} ({1}/{2}) ",
+ 1.0 * ProbeMatchingStats.MatchedNodes /
+ ProbeMatchingStats.AttemptedNodes,
+ ProbeMatchingStats.MatchedNodes,
+ ProbeMatchingStats.AttemptedNodes)
+ << "inline tree node(s) corresponding to inlined source functions. "
+
+ << "Couldn't match " << ProbeMatchingStats.MismatchingNodeHash
+ << " nodes with mismatching hash, "
+ << ProbeMatchingStats.MissingProfileNode
+ << " with missing profile node.\n"
+
+ << "BOLT-INFO: the profile has "
+ << formatv("{0:p} ({1}/{2}) calls missing probe, ",
+ 1.0 * ProbeMatchingStats.MissingCallProbe /
+ ProbeMatchingStats.TotalCallSites,
+ ProbeMatchingStats.MissingCallProbe,
+ ProbeMatchingStats.TotalCallSites)
+ << formatv("{0:p} ({1}/{2}) calls missing callee, ",
+ 1.0 * ProbeMatchingStats.MissingCallee /
+ ProbeMatchingStats.TotalCallSites,
+ ProbeMatchingStats.MissingCallee,
+ ProbeMatchingStats.TotalCallSites)
+ << formatv("{0:p} ({1}/{2}) calls with callees missing inline trees, ",
+ 1.0 * ProbeMatchingStats.MissingInlineTree /
+ ProbeMatchingStats.TotalCallSites,
+ ProbeMatchingStats.MissingInlineTree,
+ ProbeMatchingStats.TotalCallSites)
+ << formatv("covering a total of {0:p} ({1}/{2}) call counts\n",
+ 1.0 * ProbeMatchingStats.MissingCallCount /
+ ProbeMatchingStats.TotalCallCount,
+ ProbeMatchingStats.MissingCallCount,
+ ProbeMatchingStats.TotalCallCount);
+
+ return BFToProbeMatchSpecs.size();
}
size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
@@ -817,15 +1122,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
}
}
- if (opts::StaleMatchingWithPseudoProbes) {
- const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
- assert(Decoder &&
- "If pseudo probes are in use, pseudo probe decoder should exist");
- for (const MCDecodedPseudoProbeInlineTree &TopLev :
- Decoder->getDummyInlineRoot().getChildren())
- TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
- }
-
// Map profiled function ids to names.
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
IdToYamLBF[YamlBF.Id] = &YamlBF;
@@ -835,8 +1131,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
const size_t MatchedWithCallGraph = matchWithCallGraph(BC);
const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);
- [[maybe_unused]] const size_t MatchedWithPseudoProbes =
- matchWithPseudoProbes(BC);
+ const size_t MatchedWithPseudoProbes = matchWithPseudoProbes(BC);
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
@@ -857,6 +1152,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
<< " functions with matching LTO common names\n";
outs() << "BOLT-INFO: matched " << MatchedWithCallGraph
<< " functions with call graph\n";
+ outs() << "BOLT-INFO: matched " << MatchedWithPseudoProbes
+ << " functions with pseudo probes\n";
outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
<< " functions with similar names\n";
}
@@ -869,9 +1166,17 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
if (BinaryFunction *BF = YamlProfileToFunction.lookup(YamlBF.Id))
parseFunctionProfile(*BF, YamlBF);
else
- ++NumUnused;
+ NumUnused += !YamlBF.Used;
}
+ // Partial matching: pass phony profile for binary functions without
+ // corresponding real (non-partial) profile.
+ yaml::bolt::BinaryFunctionProfile Phony;
+ Phony.ExecCount = 1;
+ for (BinaryFunction *BF : llvm::make_first_range(BFToProbeMatchSpecs))
+ if (!ProfiledFunctions.count(BF))
+ parseFunctionProfile(*BF, Phony);
+
BC.setNumUnusedProfiledObjects(NumUnused);
if (opts::Lite &&
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index cd4e77b0dbb6..1667026a8e3e 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -14,6 +14,7 @@
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
@@ -25,10 +26,14 @@
namespace opts {
using namespace llvm;
extern cl::opt<bool> ProfileUseDFS;
-cl::opt<bool> ProfileWritePseudoProbes(
+cl::opt<ProbesWriteMode> ProfileWritePseudoProbes(
"profile-write-pseudo-probes",
- cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
- cl::cat(BoltOptCategory));
+ cl::desc("Write pseudo probes into YAML profile"),
+ cl::init(ProbesWriteMode::PWM_None),
+ cl::values(clEnumValN(ProbesWriteMode::PWM_Default, "", "default"),
+ clEnumValN(ProbesWriteMode::PWM_Compact, "compact",
+ "compressed encoding")),
+ cl::ValueOptional, cl::cat(BoltOutputCategory));
} // namespace opts
namespace llvm {
@@ -87,10 +92,6 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
yaml::bolt::ProfilePseudoProbeDesc Desc;
InlineTreeDesc InlineTree;
- for (const MCDecodedPseudoProbeInlineTree &TopLev :
- Decoder.getDummyInlineRoot().getChildren())
- InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
-
for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
++InlineTree.HashIdxMap[FuncDesc.FuncHash];
@@ -153,6 +154,37 @@ void YAMLProfileWriter::BlockProbeCtx::finalize(
}
};
+ auto compressVec = [](auto Vec, std::string &Out, bool Probes) {
+ if (Vec.size() == 1) {
+ if (Probes ? Vec.front() == 1 : Vec.front() == 0)
+ return;
+ }
+ // Invariant: current run is [StartGroup, EndGroup]
+ auto StartGroup = Vec.begin();
+ auto EndGroup = Vec.begin();
+ ListSeparator LS(",");
+ while (StartGroup != Vec.end()) {
+ ++EndGroup;
+ const size_t Distance = EndGroup - StartGroup;
+ const uint64_t Delta = *EndGroup - *StartGroup;
+ const bool AtEnd = EndGroup == Vec.end();
+ // Happy case: advance the group
+ if (!AtEnd && Delta == Distance)
+ continue;
+ // Otherwise print the current group
+ Out += LS;
+ APInt Int(64, *StartGroup);
+ Out += toString(Int, 36, false);
+
+ Int = APInt(64, Distance - 1, false);
+ if (Distance > 1)
+ Out += '^' + toString(Int, 36, false);
+ if (AtEnd)
+ break;
+ StartGroup = EndGroup;
+ }
+ };
+
// Check identical block probes and merge them
std::unordered_map<std::vector<uint64_t>, std::vector<uint32_t>, ProbeHasher>
ProbesToNodes;
@@ -160,10 +192,19 @@ void YAMLProfileWriter::BlockProbeCtx::finalize(
llvm::sort(Probes);
ProbesToNodes[Probes].emplace_back(NodeId);
}
+ ListSeparator LS(" ");
+ std::string &Probe = YamlBB.PseudoProbesStr;
for (auto &[Probes, Nodes] : ProbesToNodes) {
llvm::sort(Nodes);
- YamlBB.PseudoProbes.emplace_back(
- yaml::bolt::PseudoProbeInfo{Probes, Nodes});
+ if (opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Default) {
+ YamlBB.PseudoProbes.emplace_back(
+ yaml::bolt::PseudoProbeInfo{Probes, Nodes});
+ } else {
+ Probe += LS;
+ compressVec(Probes, Probe, true);
+ Probe += "_";
+ compressVec(Nodes, Probe, false);
+ }
}
for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) {
auto It = CallProbes.find(CSI.Offset);
@@ -187,18 +228,21 @@ void YAMLProfileWriter::BlockProbeCtx::finalize(
}
}
-std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
- YAMLProfileWriter::InlineTreeMapTy>
-YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
- const InlineTreeDesc &InlineTree,
- uint64_t GUID) {
+YAMLProfileWriter::InlineTreeMapTy YAMLProfileWriter::convertBFInlineTree(
+ const MCPseudoProbeDecoder &Decoder, const InlineTreeDesc &InlineTree,
+ const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF) {
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
- auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
- if (It == InlineTree.TopLevelGUIDToInlineTree.end())
- return {YamlInlineTree, InlineTreeNodeId};
- const MCDecodedPseudoProbeInlineTree *Root = It->second;
- assert(Root && "Malformed TopLevelGUIDToInlineTree");
+ std::string InlineTreeStr;
+ uint64_t Addr = BF.getAddress();
+ uint64_t Size = BF.getSize();
+ auto Probes = Decoder.getAddress2ProbesMap().find(Addr, Addr + Size);
+ if (Probes.empty())
+ return InlineTreeNodeId;
+ const MCDecodedPseudoProbe &Probe = *Probes.begin();
+ const MCDecodedPseudoProbeInlineTree *Root = Probe.getInlineTreeNode();
+ while (Root->hasInlineSite())
+ Root = (const MCDecodedPseudoProbeInlineTree *)Root->Parent;
uint32_t Index = 0;
uint32_t PrevParent = 0;
uint32_t PrevGUIDIdx = 0;
@@ -215,7 +259,30 @@ YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx, 0, 0});
PrevParent = Node.ParentId;
}
- return {YamlInlineTree, InlineTreeNodeId};
+ if (opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Default) {
+ YamlBF.InlineTree = YamlInlineTree;
+ return InlineTreeNodeId;
+ }
+ assert(opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Compact);
+ // Write compact form
+ std::string &Out = YamlBF.InlineTreeStr;
+ raw_string_ostream OS(Out);
+ ListSeparator LS(" ");
+ for (const yaml::bolt::InlineTreeNode &Node : YamlInlineTree) {
+ Out += LS;
+ APInt Int(32, Node.GUIDIndex);
+ if (Node.GUIDIndex != UINT32_MAX)
+ Out += toString(Int, 36, false);
+ Out += '_';
+ Int = APInt(32, Node.ParentIndexDelta);
+ if (Node.ParentIndexDelta)
+ Out += toString(Int, 36, false);
+ Out += '_';
+ Int = APInt(32, Node.CallSiteProbe);
+ if (Node.CallSiteProbe)
+ Out += toString(Int, 36, false);
+ }
+ return InlineTreeNodeId;
}
yaml::bolt::BinaryFunctionProfile
@@ -241,8 +308,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.ExternEntryCount = BF.getExternEntryCount();
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
if (PseudoProbeDecoder && BF.getGUID()) {
- std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
- convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID());
+ InlineTreeNodeId =
+ convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF, YamlBF);
}
BinaryFunction::BasicBlockOrderType Order;
@@ -419,7 +486,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
// Add probe inline tree nodes.
InlineTreeDesc InlineTree;
if (const MCPseudoProbeDecoder *Decoder =
- opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder);
// Add all function objects.
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 947d8992890d..cc0103261560 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -50,7 +50,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
clEnumValN(PPP_All, "all", "enable all debugging printout")),
cl::Hidden, cl::cat(BoltCategory));
-extern cl::opt<bool> ProfileWritePseudoProbes;
+extern cl::opt<ProbesWriteMode> ProfileWritePseudoProbes;
extern cl::opt<bool> StaleMatchingWithPseudoProbes;
} // namespace opts
diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test
index 9224cf163dbc..beb67277db24 100644
--- a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test
+++ b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test
@@ -5,8 +5,10 @@
# RUN: -o %t.bolt -data %t/yaml -infer-stale-profile -v=2 \
# RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s
+# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (3/3) inline tree root(s) corresponding to profile or binary functions. Couldn't match 0 roots with mismatching GUIDs, 0 with mismatching hash.
+# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (6/6) inline tree node(s) corresponding to inlined source functions. Couldn't match 0 nodes with mismatching hash, 0 with missing profile node.
# CHECK: BOLT-WARNING: 3 (100.0% of all profiled) functions have invalid (possibly stale) profile
-# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (3 out of 3 stale)
+# CHECK: BOLT-INFO: inference found pseudo probe match for 233.33% of basic blocks (7 out of 3 stale) responsible for 233.33% samples (7 out of 3 stale)
#--- yaml
---
diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test
index 7be327d698b1..a28fadff4e76 100644
--- a/bolt/test/X86/match-blocks-with-pseudo-probes.test
+++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test
@@ -8,7 +8,9 @@
# RUN: --print-cfg --funcs=main --infer-stale-profile \
# RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s
-# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (1 out of 1 stale)
+# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (1/1) inline tree root(s) corresponding to profile or binary functions. Couldn't match 0 roots with mismatching GUIDs, 0 with mismatching hash.
+# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (1/1) inline tree node(s) corresponding to inlined source functions. Couldn't match 0 nodes with mismatching hash, 0 with missing profile node.
+# CHECK: BOLT-INFO: inference found pseudo probe match for 100.00% of basic blocks (1 out of 1 stale)
#--- main.s
.text
diff --git a/bolt/test/X86/reader-stale-yaml.test b/bolt/test/X86/reader-stale-yaml.test
index 4c6e9c25551f..23338a885d28 100644
--- a/bolt/test/X86/reader-stale-yaml.test
+++ b/bolt/test/X86/reader-stale-yaml.test
@@ -77,10 +77,10 @@ CHECK2: pre-processing profile using YAML profile reader
CHECK2: applying profile inference for "SolveCubic"
CHECK2: Matched yaml block (bid = 0) with hash 4600940a609c0000 to BB (index = 0) with hash 4600940a609c0000
CHECK2-NEXT: exact match
-CHECK2: Matched yaml block (bid = 13) with hash a8d50000f81902a7 to BB (index = 13) with hash a8d5aa43f81902a7
-CHECK2-NEXT: loose match
CHECK2: Matched yaml block (bid = 1) with hash 167a1f084f130088 to BB (index = 1) with hash 167a1f084f130088
CHECK2-NEXT: exact match
+CHECK2: Matched yaml block (bid = 13) with hash a8d50000f81902a7 to BB (index = 13) with hash a8d5aa43f81902a7
+CHECK2-NEXT: loose match
CHECK2: Matched yaml block (bid = 3) with hash c516000073dc00a0 to BB (index = 3) with hash c516b1c973dc00a0
CHECK2-NEXT: loose match
CHECK2: Matched yaml block (bid = 5) with hash 6446e1ea500111 to BB (index = 5) with hash 6446e1ea500111