summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2024-11-01 13:30:36 -0700
committerAmir Ayupov <aaupov@fb.com>2024-11-01 13:30:36 -0700
commita8c1697883f03787ff50fc605013b034a15de00c (patch)
tree1a58ab0a903d637bad3e2f38d599c696ab4414b5
parent4b4ea6d84bddb5e7b3f144d18630390755e8f7b6 (diff)
parente1749c2b6c310814a0fe23fde3da59aa40407132 (diff)
Created using spr 1.3.4
-rw-r--r--bolt/docs/BAT.md12
-rw-r--r--bolt/include/bolt/Core/BinaryFunction.h4
-rw-r--r--bolt/include/bolt/Profile/BoltAddressTranslation.h3
-rw-r--r--bolt/include/bolt/Profile/DataAggregator.h3
-rw-r--r--bolt/lib/Profile/BoltAddressTranslation.cpp70
-rw-r--r--bolt/lib/Profile/DataAggregator.cpp111
-rw-r--r--bolt/test/X86/callcont-fallthru.s138
7 files changed, 273 insertions, 68 deletions
diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md
index 817ad288aa34..3b42c36541ac 100644
--- a/bolt/docs/BAT.md
+++ b/bolt/docs/BAT.md
@@ -54,7 +54,7 @@ Functions table:
| table |
| |
| Secondary entry |
-| points |
+| points and LPs |
|------------------|
```
@@ -80,7 +80,7 @@ Hot indices are delta encoded, implicitly starting at zero.
| `HotIndex` | Delta, ULEB128 | Index of corresponding hot function in hot functions table | Cold |
| `FuncHash` | 8b | Function hash for input function | Hot |
| `NumBlocks` | ULEB128 | Number of basic blocks in the original function | Hot |
-| `NumSecEntryPoints` | ULEB128 | Number of secondary entry points in the original function | Hot |
+| `NumSecEntryPoints` | ULEB128 | Number of secondary entry points and landing pads in the original function | Hot |
| `ColdInputSkew` | ULEB128 | Skew to apply to all input offsets | Cold |
| `NumEntries` | ULEB128 | Number of address translation entries for a function | Both |
| `EqualElems` | ULEB128 | Number of equal offsets in the beginning of a function | Both |
@@ -116,7 +116,11 @@ input basic block mapping.
### Secondary Entry Points table
The table is emitted for hot fragments only. It contains `NumSecEntryPoints`
-offsets denoting secondary entry points, delta encoded, implicitly starting at zero.
+offsets denoting secondary entry points and landing pads, delta encoded,
+implicitly starting at zero.
| Entry | Encoding | Description |
| ----- | -------- | ----------- |
-| `SecEntryPoint` | Delta, ULEB128 | Secondary entry point offset |
+| `SecEntryPoint` | Delta, ULEB128 | Secondary entry point offset with `LPENTRY` LSB bit |
+
+`LPENTRY` bit denotes whether a given offset is a landing pad block. If not set,
+the offset is a secondary entry point.
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 6ebbaf94754e..3ce3be0a7080 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -906,6 +906,10 @@ public:
return BB && BB->getOffset() == Offset ? BB : nullptr;
}
+ const BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) const {
+ return const_cast<BinaryFunction *>(this)->getBasicBlockAtOffset(Offset);
+ }
+
/// Retrieve the landing pad BB associated with invoke instruction \p Invoke
/// that is in \p BB. Return nullptr if none exists
BinaryBasicBlock *getLandingPadBBFor(const BinaryBasicBlock &BB,
diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
index 65b9ba874368..62367ca3aebd 100644
--- a/bolt/include/bolt/Profile/BoltAddressTranslation.h
+++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -181,6 +181,9 @@ private:
/// translation map entry
const static uint32_t BRANCHENTRY = 0x1;
+ /// Identifies a landing pad in secondary entry point map entry.
+ const static uint32_t LPENTRY = 0x1;
+
public:
/// Map basic block input offset to a basic block index and hash pair.
class BBHashMapTy {
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6453b3070ceb..2880bfd03be7 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -266,7 +266,8 @@ private:
uint64_t Mispreds);
/// Register a \p Branch.
- bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
+ bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds,
+ bool IsPreagg);
/// Register a trace between two LBR entries supplied in execution order.
bool doTrace(const LBREntry &First, const LBREntry &Second,
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index ec7e303c0f52..9ce62052653e 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -86,21 +86,16 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
continue;
- uint32_t NumSecondaryEntryPoints = 0;
- Function.forEachEntryPoint([&](uint64_t Offset, const MCSymbol *) {
- if (!Offset)
- return true;
- ++NumSecondaryEntryPoints;
- SecondaryEntryPointsMap[OutputAddress].push_back(Offset);
- return true;
- });
-
LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
LLVM_DEBUG(dbgs() << " Address reference: 0x"
<< Twine::utohexstr(Function.getOutputAddress()) << "\n");
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(InputAddress)));
- LLVM_DEBUG(dbgs() << " Secondary Entry Points: " << NumSecondaryEntryPoints
- << '\n');
+ LLVM_DEBUG({
+ uint32_t NumSecondaryEntryPoints = 0;
+ if (SecondaryEntryPointsMap.count(InputAddress))
+ NumSecondaryEntryPoints = SecondaryEntryPointsMap[InputAddress].size();
+ dbgs() << " Secondary Entry Points: " << NumSecondaryEntryPoints << '\n';
+ });
MapTy Map;
for (const BinaryBasicBlock *const BB :
@@ -207,10 +202,9 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
<< Twine::utohexstr(Address) << ".\n");
encodeULEB128(Address - PrevAddress, OS);
PrevAddress = Address;
- const uint32_t NumSecondaryEntryPoints =
- SecondaryEntryPointsMap.count(Address)
- ? SecondaryEntryPointsMap[Address].size()
- : 0;
+ uint32_t NumSecondaryEntryPoints = 0;
+ if (SecondaryEntryPointsMap.count(HotInputAddress))
+ NumSecondaryEntryPoints = SecondaryEntryPointsMap[HotInputAddress].size();
uint32_t Skew = 0;
if (Cold) {
auto HotEntryIt = Maps.find(ColdPartSource[Address]);
@@ -281,7 +275,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
if (!Cold && NumSecondaryEntryPoints) {
LLVM_DEBUG(dbgs() << "Secondary entry points: ");
// Secondary entry point offsets, delta-encoded
- for (uint32_t Offset : SecondaryEntryPointsMap[Address]) {
+ for (uint32_t Offset : SecondaryEntryPointsMap[HotInputAddress]) {
encodeULEB128(Offset - PrevOffset, OS);
LLVM_DEBUG(dbgs() << formatv("{0:x} ", Offset));
PrevOffset = Offset;
@@ -471,8 +465,12 @@ void BoltAddressTranslation::dump(raw_ostream &OS) const {
const std::vector<uint32_t> &SecondaryEntryPoints =
SecondaryEntryPointsIt->second;
OS << SecondaryEntryPoints.size() << " secondary entry points:\n";
- for (uint32_t EntryPointOffset : SecondaryEntryPoints)
- OS << formatv("{0:x}\n", EntryPointOffset);
+ for (uint32_t EntryPointOffset : SecondaryEntryPoints) {
+ OS << formatv("{0:x}", EntryPointOffset >> 1);
+ if (EntryPointOffset & LPENTRY)
+ OS << " (lp)";
+ OS << '\n';
+ }
}
OS << "\n";
}
@@ -584,14 +582,21 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
// changed
if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
continue;
+ const uint64_t FuncAddress = BF.getAddress();
// Prepare function and block hashes
- FuncHashes.addEntry(BF.getAddress(), BF.computeHash());
+ FuncHashes.addEntry(FuncAddress, BF.computeHash());
BF.computeBlockHashes();
- BBHashMapTy &BBHashMap = getBBHashMap(BF.getAddress());
+ BBHashMapTy &BBHashMap = getBBHashMap(FuncAddress);
+ std::vector<uint32_t> SecondaryEntryPoints;
// Set BF/BB metadata
- for (const BinaryBasicBlock &BB : BF)
+ for (const BinaryBasicBlock &BB : BF) {
BBHashMap.addEntry(BB.getInputOffset(), BB.getIndex(), BB.getHash());
- NumBasicBlocksMap.emplace(BF.getAddress(), BF.size());
+ bool IsLandingPad = BB.isLandingPad();
+ if (IsLandingPad || BF.getSecondaryEntryPointSymbol(BB))
+ SecondaryEntryPoints.emplace_back(BB.getOffset() << 1 | IsLandingPad);
+ }
+ SecondaryEntryPointsMap.emplace(FuncAddress, SecondaryEntryPoints);
+ NumBasicBlocksMap.emplace(FuncAddress, BF.size());
}
}
@@ -601,13 +606,20 @@ BoltAddressTranslation::getSecondaryEntryPointId(uint64_t Address,
auto FunctionIt = SecondaryEntryPointsMap.find(Address);
if (FunctionIt == SecondaryEntryPointsMap.end())
return 0;
- const std::vector<uint32_t> &Offsets = FunctionIt->second;
- auto OffsetIt = std::find(Offsets.begin(), Offsets.end(), Offset);
- if (OffsetIt == Offsets.end())
- return 0;
- // Adding one here because main entry point is not stored in BAT, and
- // enumeration for secondary entry points starts with 1.
- return OffsetIt - Offsets.begin() + 1;
+ unsigned EntryPoints = 0;
+ // Note we need to scan the vector to get the entry point id because it
+ // contains both entry points and landing pads.
+ for (uint32_t Off : FunctionIt->second) {
+ // Skip landing pads.
+ if (Off & LPENTRY)
+ continue;
+ // Adding one here because main entry point is not stored in BAT, and
+ // enumeration for secondary entry points starts with 1.
+ if (Off >> 1 == Offset)
+ return EntryPoints + 1;
+ ++EntryPoints;
+ }
+ return 0;
}
std::pair<const BinaryFunction *, unsigned>
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index fcde6f5f4642..6e6f528d58f4 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -774,42 +774,75 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
}
bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
- uint64_t Mispreds) {
- bool IsReturn = false;
- auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
- if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
- Addr -= Func->getAddress();
- if (IsFrom) {
- auto checkReturn = [&](auto MaybeInst) {
- IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst);
- };
- if (Func->hasInstructions())
- checkReturn(Func->getInstructionAtOffset(Addr));
- else
- checkReturn(Func->disassembleInstructionAtOffset(Addr));
- }
+ uint64_t Mispreds, bool IsPreagg) {
+ // Returns whether \p Offset in \p Func contains a return instruction.
+ auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
+ auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
+ return Func.hasInstructions()
+ ? isReturn(Func.getInstructionAtOffset(Offset))
+ : isReturn(Func.disassembleInstructionAtOffset(Offset));
+ };
- if (BAT)
- Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
+ // Returns whether \p Offset in \p Func may be a call continuation excluding
+ // entry points and landing pads.
+ auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) {
+ // No call continuation at a function start.
+ if (!Offset)
+ return false;
+
+ // FIXME: support BAT case where the function might be in empty state
+ // (split fragments declared non-simple).
+ if (!Func.hasCFG())
+ return false;
+
+ // The offset should not be an entry point or a landing pad.
+ const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
+ return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad();
+ };
- if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
- Func = ParentFunc;
- if (IsFrom)
- NumColdSamples += Count;
- }
+ // Mutates \p Addr to an offset into the containing function, performing BAT
+ // offset translation and parent lookup.
+ //
+ // Returns the containing function (or BAT parent) and whether the address
+ // corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
+ auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
+ BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
+ if (!Func)
+ return std::pair{Func, false};
- return Func;
- }
- return nullptr;
+ Addr -= Func->getAddress();
+
+ bool IsRetOrCallCont =
+ IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);
+
+ if (BAT)
+ Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
+
+ BinaryFunction *ParentFunc = getBATParentFunction(*Func);
+ if (!ParentFunc)
+ return std::pair{Func, IsRetOrCallCont};
+
+ if (IsFrom)
+ NumColdSamples += Count;
+
+ return std::pair{ParentFunc, IsRetOrCallCont};
};
- BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
+ uint64_t ToOrig = To;
+ auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom=*/true);
+ auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom=*/false);
+ if (!FromFunc && !ToFunc)
+ return false;
+
+ // Record call to continuation trace.
+ if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) {
+ LBREntry First{ToOrig - 1, ToOrig - 1, false};
+ LBREntry Second{ToOrig, ToOrig, false};
+ return doTrace(First, Second, Count);
+ }
// Ignore returns.
if (IsReturn)
return true;
- BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
- if (!FromFunc && !ToFunc)
- return false;
// Treat recursive control transfers as inter-branches.
if (FromFunc == ToFunc && To != 0) {
@@ -826,10 +859,19 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
if (!FromFunc || !ToFunc) {
LLVM_DEBUG({
- dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
- << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
- << " and ending in " << ToFunc->getPrintName()
- << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
+ dbgs() << "Out of range trace starting in ";
+ if (FromFunc)
+ dbgs() << formatv("{0} @ {1:x}", *FromFunc,
+ First.To - FromFunc->getAddress());
+ else
+ dbgs() << Twine::utohexstr(First.To);
+ dbgs() << " and ending in ";
+ if (ToFunc)
+ dbgs() << formatv("{0} @ {1:x}", *ToFunc,
+ Second.From - ToFunc->getAddress());
+ else
+ dbgs() << Twine::utohexstr(Second.From);
+ dbgs() << '\n';
});
NumLongRangeTraces += Count;
return false;
@@ -1611,7 +1653,8 @@ void DataAggregator::processBranchEvents() {
for (const auto &AggrLBR : BranchLBRs) {
const Trace &Loc = AggrLBR.first;
const TakenBranchInfo &Info = AggrLBR.second;
- doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
+ doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
+ /*IsPreagg*/ false);
}
}
@@ -1772,7 +1815,7 @@ void DataAggregator::processPreAggregated() {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
- AggrEntry.Mispreds);
+ AggrEntry.Mispreds, /*IsPreagg=*/true);
break;
case AggregatedLBREntry::FT:
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
new file mode 100644
index 000000000000..641beb79ecf2
--- /dev/null
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -0,0 +1,138 @@
+## Ensures that a call continuation fallthrough count is set when using
+## pre-aggregated perf data.
+
+# RUN: %clangxx %cxxflags %s -o %t -Wl,-q -nostdlib
+# RUN: link_fdata %s %t %t.pa1 PREAGG
+# RUN: link_fdata %s %t %t.pa2 PREAGG2
+# RUN: link_fdata %s %t %t.pa3 PREAGG3
+# RUN: link_fdata %s %t %t.pa4 PREAGG4
+
+## Check normal case: fallthrough is not LP or secondary entry.
+# RUN: llvm-strip --strip-unneeded %t -o %t.exe
+# RUN: llvm-bolt %t.exe --pa -p %t.pa1 -o %t.out \
+# RUN: --print-cfg --print-only=main | FileCheck %s
+
+## Check that getFallthroughsInTrace correctly handles a trace starting at plt
+## call continuation
+# RUN: llvm-bolt %t.exe --pa -p %t.pa2 -o %t.out2 \
+# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2
+
+## Check that we don't treat secondary entry points as call continuation sites.
+# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \
+# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3
+
+## Check fallthrough to a landing pad case.
+# RUN: llvm-bolt %t.exe --pa -p %t.pa4 -o %t.out --enable-bat \
+# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK4
+
+## Check that a landing pad is emitted in BAT
+# RUN: llvm-bat-dump %t.out --dump-all | FileCheck %s --check-prefix=CHECK-BAT
+
+# CHECK-BAT: 1 secondary entry points:
+# CHECK-BAT-NEXT: 0x38 (lp)
+
+ .globl foo
+ .type foo, %function
+foo:
+ pushq %rbp
+ movq %rsp, %rbp
+ popq %rbp
+Lfoo_ret:
+ retq
+.size foo, .-foo
+
+ .globl main
+ .type main, %function
+main:
+.Lfunc_begin0:
+ .cfi_startproc
+ .cfi_personality 155, DW.ref.__gxx_personality_v0
+ .cfi_lsda 27, .Lexception0
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x20, %rsp
+ movl $0x0, -0x4(%rbp)
+ movl %edi, -0x8(%rbp)
+ movq %rsi, -0x10(%rbp)
+ callq puts@PLT
+## Target is a call continuation
+# PREAGG: B X:0 #Ltmp1# 2 0
+# CHECK: callq puts@PLT
+# CHECK-NEXT: count: 2
+
+Ltmp1:
+ movq -0x10(%rbp), %rax
+ movq 0x8(%rax), %rdi
+ movl %eax, -0x14(%rbp)
+
+Ltmp4:
+ cmpl $0x0, -0x14(%rbp)
+ je Ltmp0
+# CHECK2: je .Ltmp0
+# CHECK2-NEXT: count: 3
+
+ movl $0xa, -0x18(%rbp)
+ callq foo
+## Target is a call continuation
+# PREAGG: B #Lfoo_ret# #Ltmp3# 1 0
+# CHECK: callq foo
+# CHECK-NEXT: count: 1
+
+## PLT call continuation fallthrough spanning the call
+# PREAGG2: F #Ltmp1# #Ltmp3_br# 3
+# CHECK2: callq foo
+# CHECK2-NEXT: count: 3
+
+## Target is a secondary entry point
+# PREAGG3: B X:0 #Ltmp3# 2 0
+# CHECK3: callq foo
+# CHECK3-NEXT: count: 0
+
+## Target is a landing pad
+# PREAGG4: B X:0 #Ltmp3# 2 0
+# CHECK4: callq puts@PLT
+# CHECK4-NEXT: count: 0
+
+Ltmp3:
+ cmpl $0x0, -0x18(%rbp)
+Ltmp3_br:
+ jmp Ltmp2
+
+Ltmp2:
+ movl -0x18(%rbp), %eax
+ addl $-0x1, %eax
+ movl %eax, -0x18(%rbp)
+ jmp Ltmp3
+ jmp Ltmp4
+ jmp Ltmp1
+
+Ltmp0:
+ xorl %eax, %eax
+ addq $0x20, %rsp
+ popq %rbp
+ retq
+.Lfunc_end0:
+ .cfi_endproc
+.size main, .-main
+
+ .section .gcc_except_table,"a",@progbits
+ .p2align 2, 0x0
+GCC_except_table0:
+.Lexception0:
+ .byte 255 # @LPStart Encoding = omit
+ .byte 255 # @TType Encoding = omit
+ .byte 1 # Call site Encoding = uleb128
+ .uleb128 .Lcst_end0-.Lcst_begin0
+.Lcst_begin0:
+ .uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
+ .uleb128 .Lfunc_end0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Lfunc_end0
+ .uleb128 Ltmp3-.Lfunc_begin0 # jumps to Ltmp3
+ .byte 0 # has no landing pad
+ .byte 0 # On action: cleanup
+.Lcst_end0:
+ .p2align 2, 0x0
+ .hidden DW.ref.__gxx_personality_v0
+ .weak DW.ref.__gxx_personality_v0
+ .section .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
+ .p2align 3, 0x0
+ .type DW.ref.__gxx_personality_v0,@object