diff options
| author | Amir Ayupov <aaupov@fb.com> | 2025-11-11 14:21:12 -0800 |
|---|---|---|
| committer | Amir Ayupov <aaupov@fb.com> | 2025-11-11 14:21:12 -0800 |
| commit | a92126532654caca51465029dc759054feacec7a (patch) | |
| tree | 4defcf76fb267ab365b193e8fe01da60512f35f5 | |
| parent | 4c3e0320a103b34d6d5570da69b2fed3d9694b12 (diff) | |
[𝘀𝗽𝗿] changes to main this commit is based onusers/aaupov/spr/main.bolt-optionally-turn-off-hash-based-stale-matching
Created using spr 1.3.4
[skip ci]
| -rw-r--r-- | bolt/include/bolt/Core/BinaryContext.h | 12 | ||||
| -rw-r--r-- | bolt/include/bolt/Profile/ProfileYAMLMapping.h | 4 | ||||
| -rw-r--r-- | bolt/include/bolt/Profile/YAMLProfileReader.h | 117 | ||||
| -rw-r--r-- | bolt/include/bolt/Profile/YAMLProfileWriter.h | 8 | ||||
| -rw-r--r-- | bolt/include/bolt/Utils/CommandLineOpts.h | 1 | ||||
| -rw-r--r-- | bolt/lib/Passes/BinaryPasses.cpp | 21 | ||||
| -rw-r--r-- | bolt/lib/Profile/DataAggregator.cpp | 7 | ||||
| -rw-r--r-- | bolt/lib/Profile/StaleProfileMatching.cpp | 418 | ||||
| -rw-r--r-- | bolt/lib/Profile/YAMLProfileReader.cpp | 475 | ||||
| -rw-r--r-- | bolt/lib/Profile/YAMLProfileWriter.cpp | 113 | ||||
| -rw-r--r-- | bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 2 | ||||
| -rw-r--r-- | bolt/test/X86/match-blocks-with-pseudo-probes-inline.test | 4 | ||||
| -rw-r--r-- | bolt/test/X86/match-blocks-with-pseudo-probes.test | 4 | ||||
| -rw-r--r-- | bolt/test/X86/reader-stale-yaml.test | 4 |
14 files changed, 777 insertions, 413 deletions
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 2af1d330b754..ee4256314862 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -760,10 +760,8 @@ public: uint32_t NumExactMatchedBlocks{0}; /// the number of loosely matched basic blocks uint32_t NumLooseMatchedBlocks{0}; - /// the number of exactly pseudo probe matched basic blocks - uint32_t NumPseudoProbeExactMatchedBlocks{0}; - /// the number of loosely pseudo probe matched basic blocks - uint32_t NumPseudoProbeLooseMatchedBlocks{0}; + /// the number of pseudo probe matched basic blocks + uint32_t NumPseudoProbeMatchedBlocks{0}; /// the number of call matched basic blocks uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile @@ -772,10 +770,8 @@ public: uint64_t ExactMatchedSampleCount{0}; /// the count of loosely matched samples uint64_t LooseMatchedSampleCount{0}; - /// the count of exactly pseudo probe matched samples - uint64_t PseudoProbeExactMatchedSampleCount{0}; - /// the count of loosely pseudo probe matched samples - uint64_t PseudoProbeLooseMatchedSampleCount{0}; + /// the count of pseudo probe matched samples + uint64_t PseudoProbeMatchedSampleCount{0}; /// the count of call matched samples uint64_t CallMatchedSampleCount{0}; } Stats; diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index b393c85321b7..509f9aedda80 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -140,6 +140,7 @@ struct BinaryBasicBlockProfile { std::vector<CallSiteInfo> CallSites; std::vector<SuccessorInfo> Successors; std::vector<PseudoProbeInfo> PseudoProbes; + std::string PseudoProbesStr; bool operator==(const BinaryBasicBlockProfile &Other) const { return Index == Other.Index; @@ -163,6 +164,7 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> { std::vector<bolt::SuccessorInfo>()); YamlIO.mapOptional("probes", BBP.PseudoProbes, std::vector<bolt::PseudoProbeInfo>()); + YamlIO.mapOptional("probe", BBP.PseudoProbesStr, std::string()); } }; @@ -207,6 +209,7 @@ struct BinaryFunctionProfile { uint64_t ExternEntryCount{0}; std::vector<BinaryBasicBlockProfile> Blocks; std::vector<InlineTreeNode> InlineTree; + std::string InlineTreeStr; bool Used{false}; }; } // end namespace bolt @@ -223,6 +226,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> { std::vector<bolt::BinaryBasicBlockProfile>()); YamlIO.mapOptional("inline_tree", BFP.InlineTree, std::vector<bolt::InlineTreeNode>()); + YamlIO.mapOptional("ppit", BFP.InlineTreeStr, std::string()); } }; diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h index 91f51f30e3ca..781b89dd8aed 100644 --- a/bolt/include/bolt/Profile/YAMLProfileReader.h +++ b/bolt/include/bolt/Profile/YAMLProfileReader.h @@ -45,9 +45,6 @@ public: using ProfileLookupMap = DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>; - using GUIDInlineTreeMap = - std::unordered_map<uint64_t, const MCDecodedPseudoProbeInlineTree *>; - /// A class for matching binary functions in functions in the YAML profile. /// First, a call graph is constructed for both profiled and binary functions. /// Then functions are hashed based on the names of their callee/caller @@ -110,51 +107,40 @@ public: // (profile) (binary) // | blocks ^ // v | - // yaml::bolt::BinaryBasicBlockProfile ~= FlowBlock - // ||| probes ^ (majority vote) - // v ||| BBPseudoProbeToBlock - // yaml::bolt::PseudoProbeInfo MCDecodedPseudoProbe + // BinaryBasicBlockProfile BasicBlock + // | probes ^ + // v | address + // PseudoProbeInfo MCDecodedPseudoProbe // | InlineTreeIndex ^ // v | probe id // [ profile node id (uint32_t) -> MCDecodedPseudoProbeInlineTree *] // InlineTreeNodeMapTy - class InlineTreeNodeMapTy { - DenseMap<uint32_t, const MCDecodedPseudoProbeInlineTree *> Map; - - void mapInlineTreeNode(uint32_t ProfileNodeIdx, - const MCDecodedPseudoProbeInlineTree *BinaryNode) { - auto Res = Map.try_emplace(ProfileNodeIdx, BinaryNode); - assert(Res.second && - "Duplicate mapping from profile node index to binary inline tree"); - (void)Res; - } - - public: - /// Returns matched InlineTree * for a given profile inline_tree_id. - const MCDecodedPseudoProbeInlineTree * - getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const { - auto It = Map.find(ProfileInlineTreeNodeId); - if (It == Map.end()) - return nullptr; - return It->second; - } + using InlineTreeNodeMapTy = + std::vector<const MCDecodedPseudoProbeInlineTree *>; - // Match up \p YamlInlineTree with binary inline tree rooted at \p Root. - // Return the number of matched nodes. - // - // This function populates the mapping from profile inline tree node id to a - // corresponding binary MCDecodedPseudoProbeInlineTree node. - size_t matchInlineTrees( - const MCPseudoProbeDecoder &Decoder, - const std::vector<yaml::bolt::InlineTreeNode> &YamlInlineTree, - const MCDecodedPseudoProbeInlineTree *Root); - }; - - // Partial probe matching specification: matched inline tree and corresponding - // BinaryFunctionProfile + // Match up \p YamlInlineTree with binary inline tree rooted at \p Root. + // Return the number of matched nodes. + // + // This function populates the \p Map indexed by \p YamlInlineTree from + // profile inline tree node id to a corresponding binary + // MCDecodedPseudoProbeInlineTree node on a successful match or nullptr. + size_t matchInlineTreesImpl( + BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF, + const MCDecodedPseudoProbeInlineTree &Root, uint32_t RootIdx, + ArrayRef<yaml::bolt::InlineTreeNode> YamlInlineTree, + MutableArrayRef<const MCDecodedPseudoProbeInlineTree *> Map, float Scale); + + /// Support mapping the profile to the same binary function more than once. + /// The index is used to check and prevent recursive (backtracking) matches. + /// The value is a node map and scale factor, used to downscale outlined + /// profile when inlining it into the function. + using RootIdxToMapTy = + std::map<uint32_t, std::pair<InlineTreeNodeMapTy, float>>; + /// Probe matching specification: map from \p BinaryFunctionProfile to the + /// partial match specification \p RootIdxToMapTy. using ProbeMatchSpec = - std::pair<InlineTreeNodeMapTy, - std::reference_wrapper<yaml::bolt::BinaryFunctionProfile>>; + std::unordered_map<const yaml::bolt::BinaryFunctionProfile *, + RootIdxToMapTy>; private: /// Adjustments for basic samples profiles (without LBR). @@ -183,18 +169,43 @@ private: /// Map a common LTO prefix to a set of binary functions. StringMap<std::unordered_set<BinaryFunction *>> LTOCommonNameFunctionMap; + /// For pseudo probe function matching. + /// Set of profile GUIDs. + std::unordered_set<uint64_t> YamlGUIDs; + /// Map binary function GUID to binary function. + std::unordered_multimap<uint64_t, BinaryFunction *> GUIDToBF; + /// Function names in profile. StringSet<> ProfileFunctionNames; /// BinaryFunction pointers indexed by YamlBP functions. std::vector<BinaryFunction *> ProfileBFs; - // Pseudo probe function GUID to inline tree node - GUIDInlineTreeMap TopLevelGUIDToInlineTree; - // Mapping from a binary function to its partial match specification // (YAML profile and its inline tree mapping to binary). - DenseMap<BinaryFunction *, std::vector<ProbeMatchSpec>> BFToProbeMatchSpecs; + DenseMap<BinaryFunction *, ProbeMatchSpec> BFToProbeMatchSpecs; + + /// Pseudo probe matching stats. + struct { + // Inline tree root mismatch causes. + uint64_t MismatchingRootGUID{0}; + uint64_t MismatchingRootHash{0}; + // Inline tree internal node mismatch causes. + uint64_t MismatchingNodeHash{0}; + uint64_t MissingProfileNode{0}; + // Incomplete profile causes. + uint64_t MissingCallProbe{0}; + uint64_t MissingCallee{0}; + uint64_t MissingInlineTree{0}; + uint64_t TotalCallSites{0}; + uint64_t MissingCallCount{0}; + uint64_t TotalCallCount{0}; + // Total matched stats. + uint64_t MatchedRoots{0}; + uint64_t MatchedNodes{0}; + uint64_t AttemptedNodes{0}; + uint64_t AttemptedRoots{0}; + } ProbeMatchingStats; /// Populate \p Function profile with the one supplied in YAML format. bool parseFunctionProfile(BinaryFunction &Function, @@ -207,7 +218,7 @@ private: /// Infer function profile from stale data (collected on older binaries). bool inferStaleProfile(BinaryFunction &Function, const yaml::bolt::BinaryFunctionProfile &YamlBF, - const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs); + const ProbeMatchSpec &ProbeMatchSpecs); /// Initialize maps for profile matching. void buildNameMaps(BinaryContext &BC); @@ -242,6 +253,18 @@ private: ProfiledFunctions.emplace(&BF); } + /// Return a top-level binary inline tree node for a given \p BF + const MCDecodedPseudoProbeInlineTree * + lookupTopLevelNode(const BinaryFunction &BF); + + /// Match up \p BF binary inline trees starting at root or \p Node if set, and + /// \p YamlBF profile starting at \p NodeIdx and record the mapping in + /// BFToProbeMatchSpecs. + void matchInlineTrees(BinaryFunction &BF, + const MCDecodedPseudoProbeInlineTree *Node, + yaml::bolt::BinaryFunctionProfile &YamlBF, + uint32_t NodeIdx, float Scale); + /// Check if the profile uses an event with a given \p Name. bool usesEvent(StringRef Name) const; }; diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h index 50ee78d342df..fada73eff75e 100644 --- a/bolt/include/bolt/Profile/YAMLProfileWriter.h +++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h @@ -36,16 +36,14 @@ public: DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>; struct InlineTreeDesc { template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>; - using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>; using GUIDNumMap = GUIDMapTy<uint32_t>; - GUIDNodeMap TopLevelGUIDToInlineTree; GUIDNumMap GUIDIdxMap; GUIDNumMap HashIdxMap; }; - static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy> - convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, - const InlineTreeDesc &InlineTree, uint64_t GUID); + static InlineTreeMapTy convertBFInlineTree( + const MCPseudoProbeDecoder &Decoder, const InlineTreeDesc &InlineTree, + const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF); static std::tuple<yaml::bolt::ProfilePseudoProbeDesc, InlineTreeDesc> convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder); diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 5c7f1b94315f..33321e30a8ff 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -134,6 +134,7 @@ enum GadgetScannerKind { GS_PACRET, GS_PAUTH, GS_ALL }; extern llvm::cl::bits<GadgetScannerKind> GadgetScannersToRun; +enum ProbesWriteMode : char { PWM_None = 0, PWM_Default, PWM_Compact }; } // namespace opts namespace llvm { diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 1d187de11c35..a675f682acc1 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1555,25 +1555,14 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { 100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount, BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount); BC.outs() << format( - "BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of " + "BOLT-INFO: inference found pseudo probe match for %.2f%% of " "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" " (%zu out of %zu stale)\n", - 100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks / - BC.Stats.NumStaleBlocks, - BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks, - 100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount / + 100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks, + BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks, + 100.0 * BC.Stats.PseudoProbeMatchedSampleCount / BC.Stats.StaleSampleCount, - BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount); - BC.outs() << format( - "BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of " - "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" - " (%zu out of %zu stale)\n", - 100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks / - BC.Stats.NumStaleBlocks, - BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks, - 100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount / - BC.Stats.StaleSampleCount, - BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount); + BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount); BC.outs() << format( "BOLT-INFO: inference found a call match for %.2f%% of basic " "blocks" diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 6b969011df58..b68cebd8e583 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -114,7 +114,7 @@ MaxSamples("max-samples", cl::cat(AggregatorCategory)); extern cl::opt<opts::ProfileFormatKind> ProfileFormat; -extern cl::opt<bool> ProfileWritePseudoProbes; +extern cl::opt<ProbesWriteMode> ProfileWritePseudoProbes; extern cl::opt<std::string> SaveProfile; cl::opt<bool> ReadPreAggregated( @@ -2388,9 +2388,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; if (BF->getGUID()) { - std::tie(YamlBF.InlineTree, InlineTreeNodeId) = - YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder, - InlineTree, BF->getGUID()); + InlineTreeNodeId = YAMLProfileWriter::convertBFInlineTree( + *PseudoProbeDecoder, InlineTree, *BF, YamlBF); } // Fetch probes belonging to all fragments const AddressProbesMap &ProbeMap = diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 5fb65153cf31..3d3d9384acc5 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -197,6 +197,7 @@ public: /// release. class StaleMatcher { public: + FlowBlock *EntryBlock = nullptr; /// Initialize stale matcher. void init(const std::vector<FlowBlock *> &Blocks, const std::vector<BlendedBlockHash> &Hashes, @@ -204,6 +205,8 @@ public: assert(Blocks.size() == Hashes.size() && Hashes.size() == CallHashes.size() && "incorrect matcher initialization"); + if (!Blocks.empty()) + EntryBlock = Blocks[0]; for (size_t I = 0; I < Blocks.size(); I++) { FlowBlock *Block = Blocks[I]; uint16_t OpHash = Hashes[I].OpcodeHash; @@ -214,47 +217,23 @@ public: } } - /// Creates a mapping from a pseudo probe to a flow block. - void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) { - BBPseudoProbeToBlock[Probe] = Block; - } - enum MatchMethod : char { MATCH_EXACT = 0, - MATCH_PROBE_EXACT, - MATCH_PROBE_LOOSE, MATCH_OPCODE, MATCH_CALL, NO_MATCH }; - /// Find the most similar flow block for a profile block given blended hash. + /// Find the most similar flow block for a profile block given its hashes. std::pair<const FlowBlock *, MatchMethod> - matchBlockStrict(BlendedBlockHash BlendedHash) { + matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash) { const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash); if (Block && ExactHash) return {Block, MATCH_EXACT}; - return {nullptr, NO_MATCH}; - } - - /// Find the most similar flow block for a profile block given pseudo probes. - std::pair<const FlowBlock *, MatchMethod> matchBlockProbe( - const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes, - const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) { - const auto &[ProbeBlock, ExactProbe] = - matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap); - if (ProbeBlock) - return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE}; - return {nullptr, NO_MATCH}; - } - - /// Find the most similar flow block for a profile block given its hashes. - std::pair<const FlowBlock *, MatchMethod> - matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) { if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash)) return {CallBlock, MATCH_CALL}; - if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first) - return {OpcodeBlock, MATCH_OPCODE}; + if (Block) + return {Block, MATCH_OPCODE}; return {nullptr, NO_MATCH}; } @@ -270,7 +249,6 @@ private: using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>; std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks; std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks; - DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock; // Uses OpcodeHash to find the most similar block for a given hash. std::pair<const FlowBlock *, bool> @@ -313,57 +291,6 @@ private: } return BestBlock; } - - /// Matches a profile block with a binary block based on pseudo probes. - /// Returns the best matching block (or nullptr) and whether the match is - /// unambiguous. - std::pair<const FlowBlock *, bool> matchWithPseudoProbes( - const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes, - const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const { - - if (!opts::StaleMatchingWithPseudoProbes) - return {nullptr, false}; - - DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount; - - auto matchProfileProbeToBlock = [&](uint32_t NodeId, - uint64_t ProbeId) -> const FlowBlock * { - const MCDecodedPseudoProbeInlineTree *BinaryNode = - InlineTreeNodeMap.getInlineTreeNode(NodeId); - if (!BinaryNode) - return nullptr; - const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0, - nullptr); - ArrayRef<MCDecodedPseudoProbe> BinaryProbes = BinaryNode->getProbes(); - auto BinaryProbeIt = llvm::lower_bound( - BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) { - return LHS.getIndex() < RHS.getIndex(); - }); - if (BinaryProbeIt == BinaryNode->getProbes().end() || - BinaryProbeIt->getIndex() != ProbeId) - return nullptr; - auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt); - if (It == BBPseudoProbeToBlock.end()) - return nullptr; - return It->second; - }; - - for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) - for (uint32_t Node : ProfileProbe.InlineTreeNodes) - for (uint64_t Probe : ProfileProbe.BlockProbes) - ++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)]; - uint32_t BestMatchCount = 0; - uint32_t TotalMatchCount = 0; - const FlowBlock *BestMatchBlock = nullptr; - for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) { - TotalMatchCount += Count; - if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock)) - continue; - BestMatchBlock = FlowBlock; - BestMatchCount = Count; - } - return {BestMatchBlock, BestMatchCount == TotalMatchCount}; - } }; void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const { @@ -546,12 +473,18 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -size_t matchWeights( - BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, - const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, - HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, - const BinaryFunction &BF, - const ArrayRef<YAMLProfileReader::ProbeMatchSpec> ProbeMatchSpecs) { +size_t matchWeights(BinaryContext &BC, + const BinaryFunction::BasicBlockOrderType &BlockOrder, + const yaml::bolt::BinaryFunctionProfile &YamlBF, + FlowFunction &Func, HashFunction HashFunction, + YAMLProfileReader::ProfileLookupMap &IdToYamlBF, + const BinaryFunction &BF, + const YAMLProfileReader::ProbeMatchSpec &ProbeMatchSpecs); + +std::pair<StaleMatcher, std::vector<BlendedBlockHash>> +initMatcher(BinaryContext &BC, + const BinaryFunction::BasicBlockOrderType &BlockOrder, + FlowFunction &Func, HashFunction HashFunction) { assert(Func.Blocks.size() == BlockOrder.size() + 2); @@ -581,46 +514,23 @@ size_t matchWeights( << Twine::utohexstr(BB->getHash()) << "\n"); } StaleMatcher Matcher; - // Collects function pseudo probes for use in the StaleMatcher. - if (opts::StaleMatchingWithPseudoProbes) { - const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); - assert(Decoder && - "If pseudo probes are in use, pseudo probe decoder should exist"); - const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap(); - const uint64_t FuncAddr = BF.getAddress(); - for (const MCDecodedPseudoProbe &Probe : - ProbeMap.find(FuncAddr, FuncAddr + BF.getSize())) - if (const BinaryBasicBlock *BB = - BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr)) - Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]); - } Matcher.init(Blocks, BlendedHashes, CallHashes); + return {Matcher, BlendedHashes}; +} + +using FlowBlockTy = std::pair<const FlowBlock *, float>; +using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>; - using FlowBlockTy = - std::pair<const FlowBlock *, const yaml::bolt::BinaryBasicBlockProfile *>; - using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>; - // Binary profile => block index => matched block + its block profile - DenseMap<const yaml::bolt::BinaryFunctionProfile *, ProfileBlockMatchMap> - MatchedBlocks; - - // Map of FlowBlock and matching method. - DenseMap<const FlowBlock *, StaleMatcher::MatchMethod> MatchedFlowBlocks; - - auto addMatchedBlock = - [&](std::pair<const FlowBlock *, StaleMatcher::MatchMethod> BlockMethod, - const yaml::bolt::BinaryFunctionProfile &YamlBP, - const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { - const auto &[MatchedBlock, Method] = BlockMethod; - if (!MatchedBlock) - return; - // Don't override earlier matches - if (MatchedFlowBlocks.contains(MatchedBlock)) - return; - MatchedFlowBlocks.try_emplace(MatchedBlock, Method); - MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB}; - }; - - // Match blocks from the profile to the blocks in CFG by strict hash. +// Match \p YamlBF blocks and return a mapping from block index to matched flow +// block. +ProfileBlockMatchMap +matchBlocks(BinaryContext &BC, const yaml::bolt::BinaryFunctionProfile &YamlBF, + HashFunction HashFunction, + YAMLProfileReader::ProfileLookupMap &IdToYamlBF, + StaleMatcher &Matcher, ArrayRef<BlendedBlockHash> BlendedHashes) { + ProfileBlockMatchMap MatchedBlocks; + + // Match blocks from the profile to the blocks in CFG. for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { // Update matching stats. ++BC.Stats.NumStaleBlocks; @@ -628,19 +538,6 @@ size_t matchWeights( assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); BlendedBlockHash YamlHash(YamlBB.Hash); - addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB); - } - // Match blocks from the profile to the blocks in CFG by pseudo probes. - for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) { - for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks) - if (!BB.PseudoProbes.empty()) - addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap), - YamlBP, BB); - } - // Match blocks from the profile to the blocks in CFG with loose methods. - for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { - assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); - BlendedBlockHash YamlHash(YamlBB.Hash); std::string CallHashStr = hashBlockCalls(IdToYamlBF, YamlBB); uint64_t CallHash = 0; @@ -652,9 +549,9 @@ size_t matchWeights( else llvm_unreachable("Unhandled HashFunction"); } - auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash); + auto [MatchedBlock, Method] = Matcher.matchBlock(YamlHash, CallHash); if (MatchedBlock == nullptr && YamlBB.Index == 0) { - MatchedBlock = Blocks[0]; + MatchedBlock = Matcher.EntryBlock; // Report as loose match Method = StaleMatcher::MATCH_OPCODE; } @@ -664,96 +561,89 @@ size_t matchWeights( << "\n"); continue; } - addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB); + MatchedBlocks[YamlBB.Index] = {MatchedBlock, 1}; + BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; + LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBB.Index << ")" + << " with hash " << Twine::utohexstr(YamlBB.Hash) + << " to BB (index = " << MatchedBlock->Index - 1 << ")" + << " with hash " << Twine::utohexstr(BinHash.combine()) + << "\n"); + (void)BinHash; + uint64_t ExecCount = YamlBB.ExecCount; + // Update matching stats accounting for the matched block. + switch (Method) { + case StaleMatcher::MATCH_EXACT: + ++BC.Stats.NumExactMatchedBlocks; + BC.Stats.ExactMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " exact match\n"); + break; + case StaleMatcher::MATCH_CALL: + ++BC.Stats.NumCallMatchedBlocks; + BC.Stats.CallMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " call match\n"); + break; + case StaleMatcher::MATCH_OPCODE: + ++BC.Stats.NumLooseMatchedBlocks; + BC.Stats.LooseMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " loose match\n"); + break; + case StaleMatcher::NO_MATCH: + LLVM_DEBUG(dbgs() << " no match\n"); + } } + return MatchedBlocks; +} - // Match jumps from the profile to the jumps from CFG - std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0); - std::vector<uint64_t> InWeight(Func.Blocks.size(), 0); - - for (const auto &[YamlBF, MatchMap] : MatchedBlocks) { - for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) { - const auto &[MatchedBlock, YamlBB] = FlowBlockProfile; - StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock); - BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; - LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")" - << " with hash " << Twine::utohexstr(YamlBB->Hash) - << " to BB (index = " << MatchedBlock->Index - 1 << ")" - << " with hash " << Twine::utohexstr(BinHash.combine()) - << "\n"); - (void)BinHash; - uint64_t ExecCount = YamlBB->ExecCount; - // Update matching stats accounting for the matched block. - switch (Method) { - case StaleMatcher::MATCH_EXACT: - ++BC.Stats.NumExactMatchedBlocks; - BC.Stats.ExactMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " exact match\n"); - break; - case StaleMatcher::MATCH_PROBE_EXACT: - ++BC.Stats.NumPseudoProbeExactMatchedBlocks; - BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " exact pseudo probe match\n"); - break; - case StaleMatcher::MATCH_PROBE_LOOSE: - ++BC.Stats.NumPseudoProbeLooseMatchedBlocks; - BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " loose pseudo probe match\n"); - break; - case StaleMatcher::MATCH_CALL: - ++BC.Stats.NumCallMatchedBlocks; - BC.Stats.CallMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " call match\n"); - break; - case StaleMatcher::MATCH_OPCODE: - ++BC.Stats.NumLooseMatchedBlocks; - BC.Stats.LooseMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " loose match\n"); - break; - case StaleMatcher::NO_MATCH: - LLVM_DEBUG(dbgs() << " no match\n"); - } - } +/// Move \p YamlBF edge weights to flow function using \p MatchedBlocks, and +/// update \p OutWeight and \p InWeight arrays. +void transferEdgeWeights(ProfileBlockMatchMap &MatchedBlocks, + MutableArrayRef<uint64_t> OutWeight, + MutableArrayRef<uint64_t> InWeight, + const yaml::bolt::BinaryFunctionProfile &YamlBF) { + for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { + for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { + if (YamlSI.Count == 0) + continue; - for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) { - for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { - if (YamlSI.Count == 0) - continue; - - // Try to find the jump for a given (src, dst) pair from the profile and - // assign the jump weight based on the profile count - const uint64_t SrcIndex = YamlBB.Index; - const uint64_t DstIndex = YamlSI.Index; - - const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first; - const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first; - - if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { - // Find a jump between the two blocks - FlowJump *Jump = nullptr; - for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { - if (SuccJump->Target == MatchedDstBlock->Index) { - Jump = SuccJump; - break; - } + // Try to find the jump for a given (src, dst) pair from the profile and + // assign the jump weight based on the profile count + const uint64_t SrcIndex = YamlBB.Index; + const uint64_t DstIndex = YamlSI.Index; + + const auto &[MatchedSrcBlock, SrcScale] = MatchedBlocks.lookup(SrcIndex); + const auto &[MatchedDstBlock, DstScale] = MatchedBlocks.lookup(DstIndex); + + if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { + // Find a jump between the two blocks + FlowJump *Jump = nullptr; + for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { + if (SuccJump->Target == MatchedDstBlock->Index) { + Jump = SuccJump; + break; } - // Assign the weight, if the corresponding jump is found - if (Jump != nullptr) { - Jump->Weight = YamlSI.Count; + } + // Assign the weight, if the corresponding jump is found + if (Jump != nullptr) { + if (uint64_t Count = SrcScale * YamlSI.Count) { + Jump->Weight += Count; Jump->HasUnknownWeight = false; } } - // Assign the weight for the src block, if it is found - if (MatchedSrcBlock != nullptr) - OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; - // Assign the weight for the dst block, if it is found - if (MatchedDstBlock != nullptr) - InWeight[MatchedDstBlock->Index] += YamlSI.Count; } + // Assign the weight for the src block, if it is found + if (MatchedSrcBlock != nullptr) + OutWeight[MatchedSrcBlock->Index] += SrcScale * YamlSI.Count; + // Assign the weight for the dst block, if it is found + if (MatchedDstBlock != nullptr) + InWeight[MatchedDstBlock->Index] += DstScale * YamlSI.Count; } } +} - // Assign block counts based on in-/out- jumps +// Assign block counts based on in-/out- jumps +size_t setBlockWeights(FlowFunction &Func, ArrayRef<uint64_t> OutWeight, + ArrayRef<uint64_t> InWeight) { + size_t Matched = 0; for (FlowBlock &Block : Func.Blocks) { if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) { assert(Block.HasUnknownWeight && "unmatched block with a positive count"); @@ -761,9 +651,97 @@ size_t matchWeights( } Block.HasUnknownWeight = false; Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); + ++Matched; + } + + return Matched; +} + +/// Assign initial block/jump weights based on the stale profile data. The goal +/// is to extract as much information from the stale profile as possible. Here +/// we assume that each basic block is specified via a hash value computed from +/// its content and the hashes of the unchanged basic blocks stay the same +/// across different revisions of the binary. Blocks may also have pseudo probe +/// information in the profile and the binary which is used for matching. +/// Whenever there is a count in the profile with the hash corresponding to one +/// of the basic blocks in the binary, the count is "matched" to the block. +/// Similarly, if both the source and the target of a count in the profile are +/// matched to a jump in the binary, the count is recorded in CFG. +size_t matchWeights(BinaryContext &BC, + const BinaryFunction::BasicBlockOrderType &BlockOrder, + const yaml::bolt::BinaryFunctionProfile &YamlBF, + FlowFunction &Func, HashFunction HashFunction, + YAMLProfileReader::ProfileLookupMap &IdToYamlBF, + const BinaryFunction &BF, + const YAMLProfileReader::ProbeMatchSpec &ProbeMatchSpecs) { + using namespace yaml::bolt; + + assert(Func.Blocks.size() == BlockOrder.size() + 2); + + auto [Matcher, BlendedHashes] = + initMatcher(BC, BlockOrder, Func, HashFunction); + + auto getBlock = [&](const MCDecodedPseudoProbeInlineTree *Node, + uint64_t Id) -> const FlowBlock * { + if (!Node) + return nullptr; + const uint64_t Addr = BF.getAddress(); + const MCDecodedPseudoProbe Dummy(0, Id, PseudoProbeType::Block, 0, 0, + nullptr); + ArrayRef<MCDecodedPseudoProbe> Probes = Node->getProbes(); + auto Cmp = [](auto &L, auto &R) { return L.getIndex() < R.getIndex(); }; + const MCDecodedPseudoProbe *Probe = llvm::lower_bound(Probes, Dummy, Cmp); + if (Probe == Probes.end() || Probe->getIndex() != Id) + return nullptr; + if (auto *BB = BF.getBasicBlockContainingOffset(Probe->getAddress() - Addr)) + return &Func.Blocks[BB->getIndex() + 1]; + return nullptr; + }; + + // Match jumps from the profile to the jumps from CFG + std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0); + std::vector<uint64_t> InWeight(Func.Blocks.size(), 0); + + // Match blocks from the profile to the blocks in CFG by pseudo probes. + for (const auto &[YamlBF, StartIdxNodeMap] : ProbeMatchSpecs) { + // Block index => matched block + its block profile + ProfileBlockMatchMap MatchedBlocks; + for (const auto &[RootIdx, NodeMapScale] : StartIdxNodeMap) { + ArrayRef NodeMap = NodeMapScale.first; + const float Scale = NodeMapScale.second; + for (const BinaryBasicBlockProfile &BB : YamlBF->Blocks) { + auto matchProbe = [&](uint32_t Node, uint64_t Id) { + if (const FlowBlock *Block = getBlock(NodeMap[Node], Id)) { + MatchedBlocks[BB.Index] = {Block, Scale}; + ++BC.Stats.NumPseudoProbeMatchedBlocks; + BC.Stats.PseudoProbeMatchedSampleCount += BB.ExecCount; + } + }; + for (const PseudoProbeInfo &PI : BB.PseudoProbes) + for (uint32_t Node : PI.InlineTreeNodes) + for (uint64_t Id : PI.BlockProbes) + matchProbe(Node, Id); + for (const CallSiteInfo &CSI : BB.CallSites) + if (CSI.Probe) + matchProbe(CSI.InlineTreeNode, CSI.Probe); + } + } + ProfileBlockMatchMap StaleMatchedBlocks = matchBlocks( + BC, *YamlBF, HashFunction, IdToYamlBF, Matcher, BlendedHashes); + for (const auto &[Id, FlowBlockPair] : StaleMatchedBlocks) + // TBD: determine scaling factor + if (!MatchedBlocks.contains(Id)) + MatchedBlocks[Id] = FlowBlockPair; + + transferEdgeWeights(MatchedBlocks, OutWeight, InWeight, *YamlBF); + } + if (ProbeMatchSpecs.empty()) { + ProfileBlockMatchMap MatchedBlocks = matchBlocks( + BC, YamlBF, HashFunction, IdToYamlBF, Matcher, BlendedHashes); + transferEdgeWeights(MatchedBlocks, OutWeight, InWeight, YamlBF); } - return MatchedBlocks[&YamlBF].size(); + return setBlockWeights(Func, OutWeight, InWeight); } /// The function finds all blocks that are (i) reachable from the Entry block @@ -982,7 +960,7 @@ void assignProfile(BinaryFunction &BF, bool YAMLProfileReader::inferStaleProfile( BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF, - const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs) { + const ProbeMatchSpec &ProbeMatchSpecs) { NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite", "Rewrite passes", opts::TimeRewrite); diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f0f87f9baec3..fdaa98f5f72e 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -9,16 +9,20 @@ #include "bolt/Profile/YAMLProfileReader.h" #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/BinaryFunction.h" -#include "bolt/Passes/MCF.h" #include "bolt/Profile/ProfileYAMLMapping.h" #include "bolt/Utils/NameResolver.h" #include "bolt/Utils/Utils.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/edit_distance.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/Demangle/Demangle.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" +#undef DEBUG_TYPE +#define DEBUG_TYPE "bolt-prof" + using namespace llvm; namespace opts { @@ -352,11 +356,7 @@ bool YAMLProfileReader::parseFunctionProfile( if (!opts::InferStaleProfile) return false; - ArrayRef<ProbeMatchSpec> ProbeMatchSpecs; - auto BFIt = BFToProbeMatchSpecs.find(&BF); - if (BFIt != BFToProbeMatchSpecs.end()) - ProbeMatchSpecs = BFIt->second; - ProfileMatched = inferStaleProfile(BF, YamlBF, ProbeMatchSpecs); + ProfileMatched = inferStaleProfile(BF, YamlBF, BFToProbeMatchSpecs[&BF]); } if (ProfileMatched) BF.markProfiled(YamlBP.Header.Flags); @@ -364,6 +364,34 @@ bool YAMLProfileReader::parseFunctionProfile( return ProfileMatched; } +// Inline tree: decode index deltas and indirection through \p YamlPD and +// set decoded fields (GUID, Hash, ParentIndex). +// Probe inline tree: move InlineTreeIndex into InlineTreeNodes. +static void +decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD, + yaml::bolt::BinaryFunctionProfile &YamlBF) { + // Decompress inline tree + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlBF.InlineTree) { + uint32_t GUIDIdx = InlineTreeNode.GUIDIndex; + if (GUIDIdx != UINT32_MAX) + PrevGUIDIdx = GUIDIdx; + else + GUIDIdx = PrevGUIDIdx; + uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; + ParentId += InlineTreeNode.ParentIndexDelta; + InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx]; + InlineTreeNode.Hash = YamlPD.Hash[HashIdx]; + InlineTreeNode.ParentIndexDelta = ParentId; + } + // Decompress probe descriptors + for (yaml::bolt::BinaryBasicBlockProfile &BB : YamlBF.Blocks) { + if (BB.PseudoProbesStr.empty()) + continue; + } +} + Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFileOrSTDIN(Filename); @@ -396,8 +424,11 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { // Match profile to function based on a function name. buildNameMaps(BC); - // Preliminary assign function execution count. + // Preliminary assign function execution count and decode pseudo probe info. + const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc; + YamlGUIDs.insert(YamlPD.GUID.begin(), YamlPD.GUID.end()); for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { + decodeYamlInlineTree(YamlPD, YamlBF); if (!BF) continue; if (!BF->hasProfile()) { @@ -433,6 +464,22 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { return true; } } + if (opts::StaleMatchingWithPseudoProbes) { + SmallVector<StringRef, 0> Suffixes( + {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); + for (const MCSymbol *Sym : BF.getSymbols()) { + StringRef SymName = Sym->getName(); + for (auto Name : {std::optional(NameResolver::restore(SymName)), + getCommonName(SymName, false, Suffixes)}) { + if (!Name) + continue; + SymName = *Name; + uint64_t GUID = Function::getGUIDAssumingExternalLinkage(SymName); + if (YamlGUIDs.count(GUID)) + return true; + } + } + } return false; } @@ -448,7 +495,8 @@ size_t YAMLProfileReader::matchWithExactName() { // the profile. Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE); - if (profileMatches(YamlBF, Function)) { + // Allow mismatching profile when stale matching is enabled. + if (profileMatches(YamlBF, Function) || opts::InferStaleProfile) { matchProfileToFunction(YamlBF, Function); ++MatchedWithExactName; } @@ -591,59 +639,260 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } -size_t YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( - const MCPseudoProbeDecoder &Decoder, - const std::vector<yaml::bolt::InlineTreeNode> &DecodedInlineTree, - const MCDecodedPseudoProbeInlineTree *Root) { - // Match inline tree nodes by GUID, checksum, parent, and call site. - for (const auto &[InlineTreeNodeId, InlineTreeNode] : - llvm::enumerate(DecodedInlineTree)) { - uint64_t GUID = InlineTreeNode.GUID; - uint64_t Hash = InlineTreeNode.Hash; - uint32_t ParentId = InlineTreeNode.ParentIndexDelta; - uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; - const MCDecodedPseudoProbeInlineTree *Cur = nullptr; - if (!InlineTreeNodeId) { - Cur = Root; - } else if (const MCDecodedPseudoProbeInlineTree *Parent = - getInlineTreeNode(ParentId)) { - for (const MCDecodedPseudoProbeInlineTree &Child : - Parent->getChildren()) { - if (Child.Guid == GUID) { - if (std::get<1>(Child.getInlineSite()) == CallSiteProbe) - Cur = &Child; - break; - } +const MCDecodedPseudoProbeInlineTree * +YAMLProfileReader::lookupTopLevelNode(const BinaryFunction &BF) { + const BinaryContext &BC = BF.getBinaryContext(); + const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); + assert(Decoder && + "If pseudo probes are in use, pseudo probe decoder should exist"); + uint64_t Addr = BF.getAddress(); + uint64_t Size = BF.getSize(); + auto Probes = Decoder->getAddress2ProbesMap().find(Addr, Addr + Size); + if (Probes.empty()) + return nullptr; + const MCDecodedPseudoProbe &Probe = *Probes.begin(); + const MCDecodedPseudoProbeInlineTree *Root = Probe.getInlineTreeNode(); + while (Root->hasInlineSite()) + Root = (const MCDecodedPseudoProbeInlineTree *)Root->Parent; + return Root; +} + +size_t YAMLProfileReader::matchInlineTreesImpl( + BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF, + const MCDecodedPseudoProbeInlineTree &Root, uint32_t RootIdx, + ArrayRef<yaml::bolt::InlineTreeNode> ProfileInlineTree, + MutableArrayRef<const MCDecodedPseudoProbeInlineTree *> Map, float Scale) { + using namespace yaml::bolt; + BinaryContext &BC = BF.getBinaryContext(); + const MCPseudoProbeDecoder &Decoder = *BC.getPseudoProbeDecoder(); + const InlineTreeNode &FuncNode = ProfileInlineTree[RootIdx]; + + using ChildMapTy = + std::unordered_map<InlineSite, const MCDecodedPseudoProbeInlineTree *, + InlineSiteHash>; + using CallSiteInfoTy = + std::unordered_map<InlineSite, const CallSiteInfo *, InlineSiteHash>; + // Mapping from a parent node id to a map InlineSite -> Child node. + DenseMap<uint32_t, ChildMapTy> ParentToChildren; + // Collect calls in the profile: map from a parent node id to a map + // InlineSite -> CallSiteInfo ptr. + DenseMap<uint32_t, CallSiteInfoTy> ParentToCSI; + for (const BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { + // Collect callees for inlined profile matching, indexed by InlineSite. + for (const CallSiteInfo &CSI : YamlBB.CallSites) { + ProbeMatchingStats.TotalCallCount += CSI.Count; + ++ProbeMatchingStats.TotalCallSites; + if (CSI.Probe == 0) { + LLVM_DEBUG(dbgs() << "no probe for " << CSI.DestId << " " << CSI.Count + << '\n'); + ++ProbeMatchingStats.MissingCallProbe; + ProbeMatchingStats.MissingCallCount += CSI.Count; + continue; + } + const BinaryFunctionProfile *Callee = IdToYamLBF.lookup(CSI.DestId); + if (!Callee) { + LLVM_DEBUG(dbgs() << "no callee for " << CSI.DestId << " " << CSI.Count + << '\n'); + ++ProbeMatchingStats.MissingCallee; + ProbeMatchingStats.MissingCallCount += CSI.Count; + continue; + } + // Get callee GUID + if (Callee->InlineTree.empty()) { + LLVM_DEBUG(dbgs() << "no inline tree for " << Callee->Name << '\n'); + ++ProbeMatchingStats.MissingInlineTree; + ProbeMatchingStats.MissingCallCount += CSI.Count; + continue; + } + uint64_t CalleeGUID = Callee->InlineTree.front().GUID; + ParentToCSI[CSI.InlineTreeNode][InlineSite(CalleeGUID, CSI.Probe)] = &CSI; + } + } + LLVM_DEBUG({ + for (auto &[ParentId, InlineSiteCSI] : ParentToCSI) { + for (auto &[InlineSite, CSI] : InlineSiteCSI) { + auto [CalleeGUID, CallSite] = InlineSite; + errs() << ParentId << "@" << CallSite << "->" + << Twine::utohexstr(CalleeGUID) << ": " << CSI->Count << ", " + << Twine::utohexstr(CSI->Offset) << '\n'; + } + } + }); + + assert(!Root.isRoot()); + LLVM_DEBUG(dbgs() << "matchInlineTreesImpl for " << BF << "@" + << Twine::utohexstr(Root.Guid) << " and " << YamlBF.Name + << "@" << Twine::utohexstr(FuncNode.GUID) << '\n'); + ++ProbeMatchingStats.AttemptedNodes; + ++ProbeMatchingStats.AttemptedRoots; + + // Match profile function with a lead node (top-level function or inlinee) + if (Root.Guid != FuncNode.GUID) { + LLVM_DEBUG(dbgs() << "Mismatching root GUID\n"); + ++ProbeMatchingStats.MismatchingRootGUID; + return 0; + } + { + uint64_t BinaryHash = Decoder.getFuncDescForGUID(Root.Guid)->FuncHash; + uint64_t ProfileHash = FuncNode.Hash; + if (BinaryHash != ProfileHash) { + LLVM_DEBUG(dbgs() << "Mismatching hashes: " + << Twine::utohexstr(BinaryHash) << " " + << Twine::utohexstr(ProfileHash) << '\n'); + ++ProbeMatchingStats.MismatchingRootHash; + return 0; + } + } + assert(!Map[RootIdx]); + Map[RootIdx] = &Root; + ++ProbeMatchingStats.MatchedRoots; + + uint64_t Matched = 1; + for (const auto &[Idx, ProfileNode] : + llvm::drop_begin(llvm::enumerate(ProfileInlineTree), RootIdx + 1)) { + // Can't match children if parent is not matched. + uint32_t ParentIdx = ProfileNode.ParentIndexDelta; + const MCDecodedPseudoProbeInlineTree *Parent = Map[ParentIdx]; + // Exclude nodes with unmatched parent from attempted stats. + if (!Parent) + continue; + ++ProbeMatchingStats.AttemptedNodes; + if (!ParentToChildren.contains(ParentIdx)) + for (const MCDecodedPseudoProbeInlineTree &Child : Parent->getChildren()) + ParentToChildren[ParentIdx].emplace(InlineSite(Child.getInlineSite()), + &Child); + ChildMapTy &ChildMap = ParentToChildren[ParentIdx]; + InlineSite Site(ProfileNode.GUID, ProfileNode.CallSiteProbe); + auto ChildIt = ChildMap.find(Site); + if (ChildIt == ChildMap.end()) { + // No binary inline tree node for a given profile inline tree node => + // the function is inlined in the profile and is outlined in the binary. + // Look up the callee binary function and attach profile inline subtree + // to it. The profile will be additively included into the function. + LLVM_DEBUG({ + dbgs() << "No binary inline tree for " + << Twine::utohexstr(ProfileNode.GUID); + auto G2FDMap = Decoder.getGUID2FuncDescMap(); + auto FDIt = G2FDMap.find(ProfileNode.GUID); + if (FDIt != G2FDMap.end()) + dbgs() << " " << FDIt->FuncName; + dbgs() << '\n'; + }); + for (auto It : llvm::make_range(GUIDToBF.equal_range(ProfileNode.GUID))) { + BinaryFunction *CalleeBF = It.second; + LLVM_DEBUG(dbgs() << "Found outlined function " + << CalleeBF->getPrintName() << '\n'); + matchInlineTrees(*CalleeBF, nullptr, YamlBF, Idx, 1); + } + continue; + } + const MCDecodedPseudoProbeInlineTree *Child = ChildIt->second; + uint64_t BinaryHash = Decoder.getFuncDescForGUID(Child->Guid)->FuncHash; + if (BinaryHash == ProfileNode.Hash) { + // Match inline trees. + assert(!Map[Idx]); + Map[Idx] = Child; + ++Matched; + } else { + LLVM_DEBUG(dbgs() << "Mismatching hashes: " + << Twine::utohexstr(BinaryHash) << " " + << Twine::utohexstr(ProfileNode.Hash) << '\n'); + ++ProbeMatchingStats.MismatchingNodeHash; + } + ChildMap.erase(ChildIt); + } + + LLVM_DEBUG(dbgs() << "Matching outlined binary inline tree nodes\n"); + // Binary inline trees without correspondence in the profile => nodes are + // inlined in the binary and outlined in the profile. Match function-level + // profiles to these nodes. The profile will be scaled down based on call + // site count in the parent profile. + for (const auto &[ParentIdx, ChildMap] : ParentToChildren) { + const auto &CSI = ParentToCSI[ParentIdx]; + for (const auto &[CallSite, Node] : ChildMap) { + uint64_t GUID = Node->Guid; + assert(GUID == std::get<0>(CallSite)); + auto ChildCSIIt = CSI.find(CallSite); + if (ChildCSIIt == CSI.end()) { + LLVM_DEBUG({ + dbgs() << "Can't find profile call site info for " + << Twine::utohexstr(GUID); + auto G2FDMap = Decoder.getGUID2FuncDescMap(); + auto FDIt = G2FDMap.find(GUID); + if (FDIt != G2FDMap.end()) + dbgs() << " " << FDIt->FuncName << " H " + << Twine::utohexstr(FDIt->FuncHash); + dbgs() << '\n'; + }); + ++ProbeMatchingStats.MissingProfileNode; + continue; } + const CallSiteInfo *ChildCSI = ChildCSIIt->second; + assert(ChildCSI); + BinaryFunctionProfile *ChildYamlBF = IdToYamLBF.lookup(ChildCSI->DestId); + assert(ChildYamlBF); + uint64_t ChildExecCount = ChildYamlBF->ExecCount; + uint64_t Freq = ChildCSI->Count; + float ChildScale = ChildExecCount ? 1.f * Freq / ChildExecCount : 1; + LLVM_DEBUG({ + dbgs() << "Match inlined node " << Twine::utohexstr(GUID) + << " at call site " << std::get<1>(CallSite) << " in parent id " + << ParentIdx << " " << Twine::utohexstr(Map[ParentIdx]->Guid) + << " with inline exec count " << Freq + << " and outlined exec count " << ChildExecCount << '\n'; + }); + matchInlineTrees(BF, Node, *ChildYamlBF, 0, /*Scale * */ ChildScale); } - // Don't match nodes if the profile is stale (mismatching binary FuncHash - // and YAML Hash) - if (Cur && Decoder.getFuncDescForGUID(Cur->Guid)->FuncHash == Hash) - mapInlineTreeNode(InlineTreeNodeId, Cur); } - return Map.size(); + LLVM_DEBUG(dbgs() << "matchInlineTreesImpl done for " << BF << "@" + << Twine::utohexstr(Root.Guid) << " and " << YamlBF.Name + << "@" << Twine::utohexstr(FuncNode.GUID) << ": " << Matched + << "/" << Map.size() << '\n'); + ProbeMatchingStats.MatchedNodes += Matched; + return Matched; } -// Decode index deltas and indirection through \p YamlPD. Return modified copy -// of \p YamlInlineTree with populated decoded fields (GUID, Hash, ParentIndex). -static std::vector<yaml::bolt::InlineTreeNode> -decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD, - std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree) { - uint32_t ParentId = 0; - uint32_t PrevGUIDIdx = 0; - for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { - uint32_t GUIDIdx = InlineTreeNode.GUIDIndex; - if (GUIDIdx != UINT32_MAX) - PrevGUIDIdx = GUIDIdx; - else - GUIDIdx = PrevGUIDIdx; - uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; - ParentId += InlineTreeNode.ParentIndexDelta; - InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx]; - InlineTreeNode.Hash = YamlPD.Hash[HashIdx]; - InlineTreeNode.ParentIndexDelta = ParentId; +void YAMLProfileReader::matchInlineTrees( + BinaryFunction &BF, const MCDecodedPseudoProbeInlineTree *Node, + yaml::bolt::BinaryFunctionProfile &YamlBF, uint32_t RootIdx, float Scale) { + ArrayRef Tree = YamlBF.InlineTree; + size_t Size = Tree.size(); + if (!Size) + return; + BinaryContext &BC = BF.getBinaryContext(); + LLVM_DEBUG(dbgs() << "Match inline trees for " << BF << " @" + << (Node ? Twine::utohexstr(Node->Guid) : "root") << " and " + << YamlBF.Name << "@" << RootIdx << " with scale " << Scale + << '\n'); + + if (!Node) + Node = lookupTopLevelNode(BF); + if (!Node) + return; + RootIdxToMapTy &FuncMatchSpecs = BFToProbeMatchSpecs[&BF][&YamlBF]; + auto MatchIt = FuncMatchSpecs.find(RootIdx); + if (MatchIt != FuncMatchSpecs.end()) { + LLVM_DEBUG(dbgs() << "Duplicate match attempt, skip\n"); + return; } - return YamlInlineTree; + const auto &[It, _] = FuncMatchSpecs.emplace( + RootIdx, std::pair(InlineTreeNodeMapTy(Size), Scale)); + MutableArrayRef Map = It->second.first; + size_t N = matchInlineTreesImpl(BF, YamlBF, *Node, RootIdx, Tree, Map, Scale); + if (N) + YamlBF.Used = true; + LLVM_DEBUG({ + const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); + dbgs() << N << "/" << Size << " match with " << BF << " at "; + // Print the inline context of head probe + ListSeparator LS(" inlined into "); + do { + dbgs() << LS << Decoder->getFuncDescForGUID(Node->Guid)->FuncName << ":" + << std::get<1>(Node->getInlineSite()); + Node = (const MCDecodedPseudoProbeInlineTree *)Node->Parent; + } while (!Node->isRoot()); + dbgs() << '\n'; + }); } size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { @@ -651,12 +900,15 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { return 0; const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); - const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc; // Set existing BF->YamlBF match into ProbeMatchSpecs for (local) probe // matching. assert(Decoder && "If pseudo probes are in use, pseudo probe decoder should exist"); + for (BinaryFunction *BF : BC.getAllBinaryFunctions()) + if (uint64_t GUID = BF->getGUID()) + GUIDToBF.emplace(GUID, BF); + for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { // BF is preliminary name-matched function to YamlBF // MatchedBF is final matched function @@ -668,22 +920,75 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { uint64_t GUID = BF->getGUID(); if (!GUID) continue; - auto It = TopLevelGUIDToInlineTree.find(GUID); - if (It == TopLevelGUIDToInlineTree.end()) - continue; - const MCDecodedPseudoProbeInlineTree *Node = It->second; - assert(Node && "Malformed TopLevelGUIDToInlineTree"); - auto &MatchSpecs = BFToProbeMatchSpecs[BF]; - auto &InlineTreeMap = - MatchSpecs.emplace_back(InlineTreeNodeMapTy(), YamlBF).first; - std::vector<yaml::bolt::InlineTreeNode> ProfileInlineTree = - decodeYamlInlineTree(YamlPD, YamlBF.InlineTree); - // Erase unsuccessful match - if (!InlineTreeMap.matchInlineTrees(*Decoder, ProfileInlineTree, Node)) - MatchSpecs.pop_back(); - } - - return 0; + // Match YamlBF with BF at top-level inline tree level. + matchInlineTrees(*BF, nullptr, YamlBF, 0, 1); + } + // Remove empty match specs + for (auto &[BF, ProbeMatchSpecs] : BFToProbeMatchSpecs) { + for (auto PMII = ProbeMatchSpecs.begin(); PMII != ProbeMatchSpecs.end();) { + RootIdxToMapTy &RootIdxToMap = PMII->second; + for (auto II = RootIdxToMap.begin(); II != RootIdxToMap.end();) { + if (llvm::any_of(II->second.first, [](auto *P) { return P; })) + ++II; + else + II = RootIdxToMap.erase(II); + } + if (RootIdxToMap.empty()) + PMII = ProbeMatchSpecs.erase(PMII); + else + ++PMII; + } + // Don't drop empty BFToProbeMatchSpecs - used during matching + } + outs() << "BOLT-INFO: pseudo probe profile matching matched " + << formatv("{0:p} ({1}/{2}) ", + 1.0 * ProbeMatchingStats.MatchedRoots / + ProbeMatchingStats.AttemptedRoots, + ProbeMatchingStats.MatchedRoots, + ProbeMatchingStats.AttemptedRoots) + << "inline tree root(s) corresponding to profile or binary functions. " + + << "Couldn't match " << ProbeMatchingStats.MismatchingRootGUID + << " roots with mismatching GUIDs, " + << ProbeMatchingStats.MismatchingRootHash + << " with mismatching hash.\n" + + << "BOLT-INFO: pseudo probe profile matching matched " + << formatv("{0:p} ({1}/{2}) ", + 1.0 * ProbeMatchingStats.MatchedNodes / + ProbeMatchingStats.AttemptedNodes, + ProbeMatchingStats.MatchedNodes, + ProbeMatchingStats.AttemptedNodes) + << "inline tree node(s) corresponding to inlined source functions. " + + << "Couldn't match " << ProbeMatchingStats.MismatchingNodeHash + << " nodes with mismatching hash, " + << ProbeMatchingStats.MissingProfileNode + << " with missing profile node.\n" + + << "BOLT-INFO: the profile has " + << formatv("{0:p} ({1}/{2}) calls missing probe, ", + 1.0 * ProbeMatchingStats.MissingCallProbe / + ProbeMatchingStats.TotalCallSites, + ProbeMatchingStats.MissingCallProbe, + ProbeMatchingStats.TotalCallSites) + << formatv("{0:p} ({1}/{2}) calls missing callee, ", + 1.0 * ProbeMatchingStats.MissingCallee / + ProbeMatchingStats.TotalCallSites, + ProbeMatchingStats.MissingCallee, + ProbeMatchingStats.TotalCallSites) + << formatv("{0:p} ({1}/{2}) calls with callees missing inline trees, ", + 1.0 * ProbeMatchingStats.MissingInlineTree / + ProbeMatchingStats.TotalCallSites, + ProbeMatchingStats.MissingInlineTree, + ProbeMatchingStats.TotalCallSites) + << formatv("covering a total of {0:p} ({1}/{2}) call counts\n", + 1.0 * ProbeMatchingStats.MissingCallCount / + ProbeMatchingStats.TotalCallCount, + ProbeMatchingStats.MissingCallCount, + ProbeMatchingStats.TotalCallCount); + + return BFToProbeMatchSpecs.size(); } size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) { @@ -817,15 +1122,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { } } - if (opts::StaleMatchingWithPseudoProbes) { - const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); - assert(Decoder && - "If pseudo probes are in use, pseudo probe decoder should exist"); - for (const MCDecodedPseudoProbeInlineTree &TopLev : - Decoder->getDummyInlineRoot().getChildren()) - TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; - } - // Map profiled function ids to names. for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) IdToYamLBF[YamlBF.Id] = &YamlBF; @@ -835,8 +1131,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { const size_t MatchedWithLTOCommonName = matchWithLTOCommonName(); const size_t MatchedWithCallGraph = matchWithCallGraph(BC); const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); - [[maybe_unused]] const size_t MatchedWithPseudoProbes = - matchWithPseudoProbes(BC); + const size_t MatchedWithPseudoProbes = matchWithPseudoProbes(BC); for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) @@ -857,6 +1152,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { << " functions with matching LTO common names\n"; outs() << "BOLT-INFO: matched " << MatchedWithCallGraph << " functions with call graph\n"; + outs() << "BOLT-INFO: matched " << MatchedWithPseudoProbes + << " functions with pseudo probes\n"; outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity << " functions with similar names\n"; } @@ -869,9 +1166,17 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { if (BinaryFunction *BF = YamlProfileToFunction.lookup(YamlBF.Id)) parseFunctionProfile(*BF, YamlBF); else - ++NumUnused; + NumUnused += !YamlBF.Used; } + // Partial matching: pass phony profile for binary functions without + // corresponding real (non-partial) profile. + yaml::bolt::BinaryFunctionProfile Phony; + Phony.ExecCount = 1; + for (BinaryFunction *BF : llvm::make_first_range(BFToProbeMatchSpecs)) + if (!ProfiledFunctions.count(BF)) + parseFunctionProfile(*BF, Phony); + BC.setNumUnusedProfiledObjects(NumUnused); if (opts::Lite && diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index cd4e77b0dbb6..1667026a8e3e 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -14,6 +14,7 @@ #include "bolt/Profile/ProfileReaderBase.h" #include "bolt/Rewrite/RewriteInstance.h" #include "bolt/Utils/CommandLineOpts.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -25,10 +26,14 @@ namespace opts { using namespace llvm; extern cl::opt<bool> ProfileUseDFS; -cl::opt<bool> ProfileWritePseudoProbes( +cl::opt<ProbesWriteMode> ProfileWritePseudoProbes( "profile-write-pseudo-probes", - cl::desc("Use pseudo probes in profile generation"), cl::Hidden, - cl::cat(BoltOptCategory)); + cl::desc("Write pseudo probes into YAML profile"), + cl::init(ProbesWriteMode::PWM_None), + cl::values(clEnumValN(ProbesWriteMode::PWM_Default, "", "default"), + clEnumValN(ProbesWriteMode::PWM_Compact, "compact", + "compressed encoding")), + cl::ValueOptional, cl::cat(BoltOutputCategory)); } // namespace opts namespace llvm { @@ -87,10 +92,6 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) { yaml::bolt::ProfilePseudoProbeDesc Desc; InlineTreeDesc InlineTree; - for (const MCDecodedPseudoProbeInlineTree &TopLev : - Decoder.getDummyInlineRoot().getChildren()) - InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; - for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap()) ++InlineTree.HashIdxMap[FuncDesc.FuncHash]; @@ -153,6 +154,37 @@ void YAMLProfileWriter::BlockProbeCtx::finalize( } }; + auto compressVec = [](auto Vec, std::string &Out, bool Probes) { + if (Vec.size() == 1) { + if (Probes ? Vec.front() == 1 : Vec.front() == 0) + return; + } + // Invariant: current run is [StartGroup, EndGroup] + auto StartGroup = Vec.begin(); + auto EndGroup = Vec.begin(); + ListSeparator LS(","); + while (StartGroup != Vec.end()) { + ++EndGroup; + const size_t Distance = EndGroup - StartGroup; + const uint64_t Delta = *EndGroup - *StartGroup; + const bool AtEnd = EndGroup == Vec.end(); + // Happy case: advance the group + if (!AtEnd && Delta == Distance) + continue; + // Otherwise print the current group + Out += LS; + APInt Int(64, *StartGroup); + Out += toString(Int, 36, false); + + Int = APInt(64, Distance - 1, false); + if (Distance > 1) + Out += '^' + toString(Int, 36, false); + if (AtEnd) + break; + StartGroup = EndGroup; + } + }; + // Check identical block probes and merge them std::unordered_map<std::vector<uint64_t>, std::vector<uint32_t>, ProbeHasher> ProbesToNodes; @@ -160,10 +192,19 @@ void YAMLProfileWriter::BlockProbeCtx::finalize( llvm::sort(Probes); ProbesToNodes[Probes].emplace_back(NodeId); } + ListSeparator LS(" "); + std::string &Probe = YamlBB.PseudoProbesStr; for (auto &[Probes, Nodes] : ProbesToNodes) { llvm::sort(Nodes); - YamlBB.PseudoProbes.emplace_back( - yaml::bolt::PseudoProbeInfo{Probes, Nodes}); + if (opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Default) { + YamlBB.PseudoProbes.emplace_back( + yaml::bolt::PseudoProbeInfo{Probes, Nodes}); + } else { + Probe += LS; + compressVec(Probes, Probe, true); + Probe += "_"; + compressVec(Nodes, Probe, false); + } } for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) { auto It = CallProbes.find(CSI.Offset); @@ -187,18 +228,21 @@ void YAMLProfileWriter::BlockProbeCtx::finalize( } } -std::tuple<std::vector<yaml::bolt::InlineTreeNode>, - YAMLProfileWriter::InlineTreeMapTy> -YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, - const InlineTreeDesc &InlineTree, - uint64_t GUID) { +YAMLProfileWriter::InlineTreeMapTy YAMLProfileWriter::convertBFInlineTree( + const MCPseudoProbeDecoder &Decoder, const InlineTreeDesc &InlineTree, + const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF) { DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree; - auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID); - if (It == InlineTree.TopLevelGUIDToInlineTree.end()) - return {YamlInlineTree, InlineTreeNodeId}; - const MCDecodedPseudoProbeInlineTree *Root = It->second; - assert(Root && "Malformed TopLevelGUIDToInlineTree"); + std::string InlineTreeStr; + uint64_t Addr = BF.getAddress(); + uint64_t Size = BF.getSize(); + auto Probes = Decoder.getAddress2ProbesMap().find(Addr, Addr + Size); + if (Probes.empty()) + return InlineTreeNodeId; + const MCDecodedPseudoProbe &Probe = *Probes.begin(); + const MCDecodedPseudoProbeInlineTree *Root = Probe.getInlineTreeNode(); + while (Root->hasInlineSite()) + Root = (const MCDecodedPseudoProbeInlineTree *)Root->Parent; uint32_t Index = 0; uint32_t PrevParent = 0; uint32_t PrevGUIDIdx = 0; @@ -215,7 +259,30 @@ YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx, 0, 0}); PrevParent = Node.ParentId; } - return {YamlInlineTree, InlineTreeNodeId}; + if (opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Default) { + YamlBF.InlineTree = YamlInlineTree; + return InlineTreeNodeId; + } + assert(opts::ProfileWritePseudoProbes == opts::ProbesWriteMode::PWM_Compact); + // Write compact form + std::string &Out = YamlBF.InlineTreeStr; + raw_string_ostream OS(Out); + ListSeparator LS(" "); + for (const yaml::bolt::InlineTreeNode &Node : YamlInlineTree) { + Out += LS; + APInt Int(32, Node.GUIDIndex); + if (Node.GUIDIndex != UINT32_MAX) + Out += toString(Int, 36, false); + Out += '_'; + Int = APInt(32, Node.ParentIndexDelta); + if (Node.ParentIndexDelta) + Out += toString(Int, 36, false); + Out += '_'; + Int = APInt(32, Node.CallSiteProbe); + if (Node.CallSiteProbe) + Out += toString(Int, 36, false); + } + return InlineTreeNodeId; } yaml::bolt::BinaryFunctionProfile @@ -241,8 +308,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, YamlBF.ExternEntryCount = BF.getExternEntryCount(); DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; if (PseudoProbeDecoder && BF.getGUID()) { - std::tie(YamlBF.InlineTree, InlineTreeNodeId) = - convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID()); + InlineTreeNodeId = + convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF, YamlBF); } BinaryFunction::BasicBlockOrderType Order; @@ -419,7 +486,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) { // Add probe inline tree nodes. InlineTreeDesc InlineTree; if (const MCPseudoProbeDecoder *Decoder = - opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr) + opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr) std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder); // Add all function objects. diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 947d8992890d..cc0103261560 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -50,7 +50,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( clEnumValN(PPP_All, "all", "enable all debugging printout")), cl::Hidden, cl::cat(BoltCategory)); -extern cl::opt<bool> ProfileWritePseudoProbes; +extern cl::opt<ProbesWriteMode> ProfileWritePseudoProbes; extern cl::opt<bool> StaleMatchingWithPseudoProbes; } // namespace opts diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test index 9224cf163dbc..beb67277db24 100644 --- a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test +++ b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test @@ -5,8 +5,10 @@ # RUN: -o %t.bolt -data %t/yaml -infer-stale-profile -v=2 \ # RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s +# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (3/3) inline tree root(s) corresponding to profile or binary functions. Couldn't match 0 roots with mismatching GUIDs, 0 with mismatching hash. +# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (6/6) inline tree node(s) corresponding to inlined source functions. Couldn't match 0 nodes with mismatching hash, 0 with missing profile node. # CHECK: BOLT-WARNING: 3 (100.0% of all profiled) functions have invalid (possibly stale) profile -# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (3 out of 3 stale) +# CHECK: BOLT-INFO: inference found pseudo probe match for 233.33% of basic blocks (7 out of 3 stale) responsible for 233.33% samples (7 out of 3 stale) #--- yaml --- diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test index 7be327d698b1..a28fadff4e76 100644 --- a/bolt/test/X86/match-blocks-with-pseudo-probes.test +++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test @@ -8,7 +8,9 @@ # RUN: --print-cfg --funcs=main --infer-stale-profile \ # RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s -# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) +# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (1/1) inline tree root(s) corresponding to profile or binary functions. Couldn't match 0 roots with mismatching GUIDs, 0 with mismatching hash. +# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (1/1) inline tree node(s) corresponding to inlined source functions. Couldn't match 0 nodes with mismatching hash, 0 with missing profile node. +# CHECK: BOLT-INFO: inference found pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) #--- main.s .text diff --git a/bolt/test/X86/reader-stale-yaml.test b/bolt/test/X86/reader-stale-yaml.test index 4c6e9c25551f..23338a885d28 100644 --- a/bolt/test/X86/reader-stale-yaml.test +++ b/bolt/test/X86/reader-stale-yaml.test @@ -77,10 +77,10 @@ CHECK2: pre-processing profile using YAML profile reader CHECK2: applying profile inference for "SolveCubic" CHECK2: Matched yaml block (bid = 0) with hash 4600940a609c0000 to BB (index = 0) with hash 4600940a609c0000 CHECK2-NEXT: exact match -CHECK2: Matched yaml block (bid = 13) with hash a8d50000f81902a7 to BB (index = 13) with hash a8d5aa43f81902a7 -CHECK2-NEXT: loose match CHECK2: Matched yaml block (bid = 1) with hash 167a1f084f130088 to BB (index = 1) with hash 167a1f084f130088 CHECK2-NEXT: exact match +CHECK2: Matched yaml block (bid = 13) with hash a8d50000f81902a7 to BB (index = 13) with hash a8d5aa43f81902a7 +CHECK2-NEXT: loose match CHECK2: Matched yaml block (bid = 3) with hash c516000073dc00a0 to BB (index = 3) with hash c516b1c973dc00a0 CHECK2-NEXT: loose match CHECK2: Matched yaml block (bid = 5) with hash 6446e1ea500111 to BB (index = 5) with hash 6446e1ea500111 |
