diff options
| author | Amir Ayupov <aaupov@fb.com> | 2025-05-01 12:00:19 -0700 |
|---|---|---|
| committer | Amir Ayupov <aaupov@fb.com> | 2025-05-01 12:00:19 -0700 |
| commit | 6d495028dbe346f184ef7b9806889e8e5d190907 (patch) | |
| tree | 08bc62afc540994ad6fbc66d064629002ea8117c /bolt/lib/Profile/DataAggregator.cpp | |
| parent | 258e1438c23c20cfe5c4908633401ab0adf4a364 (diff) | |
[𝘀𝗽𝗿] changes to main this commit is based onusers/aaupov/spr/main.boltnfci-simplify-dataaggregator-using-traces
Created using spr 1.3.4
[skip ci]
Diffstat (limited to 'bolt/lib/Profile/DataAggregator.cpp')
| -rw-r--r-- | bolt/lib/Profile/DataAggregator.cpp | 144 |
1 files changed, 36 insertions, 108 deletions
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index a8a187974418..da260e09a9f8 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -499,18 +499,15 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { filterBinaryMMapInfo(); prepareToParse("events", MainEventsPPI, ErrorCallback); + if (opts::BasicAggregation ? parseBasicEvents() : parseBranchEvents()) + errs() << "PERF2BOLT: failed to parse samples\n"; + if (opts::HeatmapMode) { - if (std::error_code EC = printLBRHeatMap()) { - errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; - exit(1); - } + if (std::error_code EC = printHeatMap()) + return errorCodeToError(EC); exit(0); } - if ((!opts::BasicAggregation && parseBranchEvents()) || - (opts::BasicAggregation && parseBasicEvents())) - errs() << "PERF2BOLT: failed to parse samples\n"; - // Special handling for memory events if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback)) return Error::success(); @@ -567,15 +564,14 @@ void DataAggregator::processProfile(BinaryContext &BC) { processMemEvents(); // Mark all functions with registered events as having a valid profile. - const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE - : BinaryFunction::PF_LBR; for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; - FuncBranchData *FBD = getBranchData(BF); - if (FBD || getFuncSampleData(BF.getNames())) { - BF.markProfiled(Flags); - if (FBD) - BF.RawBranchCount = FBD->getNumExecutedBranches(); + if (FuncBranchData *FBD = getBranchData(BF)) { + BF.markProfiled(BinaryFunction::PF_LBR); + BF.RawSampleCount = FBD->getNumExecutedBranches(); + } else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) { + BF.markProfiled(BinaryFunction::PF_SAMPLE); + BF.RawSampleCount = FSD->getSamples(); } } @@ -632,10 +628,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func, bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address, uint64_t Count) { + // To record executed bytes, use basic block size as is regardless of BAT. + uint64_t BlockSize = 0; + if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset( + Address - OrigFunc.getAddress())) + BlockSize = BB->getOriginalSize(); + BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc); BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; - if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress()))) + if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress()))) NumColdSamples += Count; + // Attach executed bytes to parent function in case of cold fragment. + Func.SampleCountInBytes += Count * BlockSize; auto I = NamesToSamples.find(Func.getOneName()); if (I == NamesToSamples.end()) { @@ -720,23 +724,6 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, : isReturn(Func.disassembleInstructionAtOffset(Offset)); }; - // Returns whether \p Offset in \p Func may be a call continuation excluding - // entry points and landing pads. - auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) { - // No call continuation at a function start. - if (!Offset) - return false; - - // FIXME: support BAT case where the function might be in empty state - // (split fragments declared non-simple). - if (!Func.hasCFG()) - return false; - - // The offset should not be an entry point or a landing pad. - const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset); - return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad(); - }; - // Mutates \p Addr to an offset into the containing function, performing BAT // offset translation and parent lookup. // @@ -749,8 +736,7 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, Addr -= Func->getAddress(); - bool IsRetOrCallCont = - IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr); + bool IsRet = IsFrom && checkReturn(*Func, Addr); if (BAT) Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); @@ -761,24 +747,16 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, NumColdSamples += Count; if (!ParentFunc) - return std::pair{Func, IsRetOrCallCont}; + return std::pair{Func, IsRet}; - return std::pair{ParentFunc, IsRetOrCallCont}; + return std::pair{ParentFunc, IsRet}; }; - uint64_t ToOrig = To; auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true); - auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false); + auto [ToFunc, _] = handleAddress(To, /*IsFrom*/ false); if (!FromFunc && !ToFunc) return false; - // Record call to continuation trace. - if (NeedsConvertRetProfileToCallCont && FromFunc != ToFunc && - (IsReturn || IsCallCont)) { - LBREntry First{ToOrig - 1, ToOrig - 1, false}; - LBREntry Second{ToOrig, ToOrig, false}; - return doTrace(First, Second, Count); - } // Ignore returns. if (IsReturn) return true; @@ -1235,21 +1213,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); if (std::error_code EC = TypeOrErr.getError()) return EC; - // Pre-aggregated profile with branches and fallthroughs needs to convert - // return profile into call to continuation fall-through. - auto Type = AggregatedLBREntry::BRANCH; - if (TypeOrErr.get() == "B") { - NeedsConvertRetProfileToCallCont = true; + auto Type = AggregatedLBREntry::TRACE; + if (LLVM_LIKELY(TypeOrErr.get() == "T")) { + } else if (TypeOrErr.get() == "B") { Type = AggregatedLBREntry::BRANCH; } else if (TypeOrErr.get() == "F") { - NeedsConvertRetProfileToCallCont = true; Type = AggregatedLBREntry::FT; } else if (TypeOrErr.get() == "f") { - NeedsConvertRetProfileToCallCont = true; Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; - } else if (TypeOrErr.get() == "T") { - // Trace is expanded into B and [Ff] - Type = AggregatedLBREntry::TRACE; } else { reportError("expected T, B, F or f"); return make_error_code(llvm::errc::io_error); @@ -1323,7 +1294,7 @@ bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); } -std::error_code DataAggregator::printLBRHeatMap() { +std::error_code DataAggregator::printHeatMap() { outs() << "PERF2BOLT: parse branch events...\n"; NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); @@ -1334,53 +1305,6 @@ std::error_code DataAggregator::printLBRHeatMap() { } Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, opts::HeatmapMaxAddress, getTextSections(BC)); - uint64_t NumTotalSamples = 0; - - if (opts::BasicAggregation) { - while (hasData()) { - ErrorOr<PerfBasicSample> SampleRes = parseBasicSample(); - if (std::error_code EC = SampleRes.getError()) { - if (EC == errc::no_such_process) - continue; - return EC; - } - PerfBasicSample &Sample = SampleRes.get(); - HM.registerAddress(Sample.PC); - NumTotalSamples++; - } - outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n"; - } else { - while (hasData()) { - ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); - if (std::error_code EC = SampleRes.getError()) { - if (EC == errc::no_such_process) - continue; - return EC; - } - - PerfBranchSample &Sample = SampleRes.get(); - - // LBRs are stored in reverse execution order. NextLBR refers to the next - // executed branch record. - const LBREntry *NextLBR = nullptr; - for (const LBREntry &LBR : Sample.LBR) { - if (NextLBR) { - // Record fall-through trace. - const uint64_t TraceFrom = LBR.To; - const uint64_t TraceTo = NextLBR->From; - ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; - } - NextLBR = &LBR; - } - if (!Sample.LBR.empty()) { - HM.registerAddress(Sample.LBR.front().To); - HM.registerAddress(Sample.LBR.back().From); - } - NumTotalSamples += Sample.LBR.size(); - } - outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; - outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; - } if (!NumTotalSamples) { if (opts::BasicAggregation) { @@ -1396,10 +1320,14 @@ std::error_code DataAggregator::printLBRHeatMap() { outs() << "HEATMAP: building heat map...\n"; - for (const auto &LBR : FallthroughLBRs) { - const Trace &Trace = LBR.first; - const FTInfo &Info = LBR.second; - HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); + if (opts::BasicAggregation) { + for (const auto &[PC, Hits] : BasicSamples) + HM.registerAddress(PC, Hits); + } else { + for (const auto &[Trace, Info] : FallthroughLBRs) + HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); + for (const auto &[Trace, Info] : BranchLBRs) + HM.registerAddress(Trace.From, Info.TakenCount); } if (HM.getNumInvalidRanges()) |
