summaryrefslogtreecommitdiff
path: root/bolt/lib
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2025-09-10 15:25:31 -0700
committerGitHub <noreply@github.com>2025-09-10 15:25:31 -0700
commit1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch)
tree57f4b1f313c8cf74eed8819870f39c36ea263c68 /bolt/lib
parent898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff)
parentb8cefcb601ddaa18482555c4ff363c01a270c2fe (diff)
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'bolt/lib')
-rw-r--r--bolt/lib/Core/BinaryBasicBlock.cpp13
-rw-r--r--bolt/lib/Core/BinaryContext.cpp24
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp6
-rw-r--r--bolt/lib/Passes/BinaryPasses.cpp12
-rw-r--r--bolt/lib/Passes/IndirectCallPromotion.cpp5
-rw-r--r--bolt/lib/Passes/PAuthGadgetScanner.cpp13
-rw-r--r--bolt/lib/Rewrite/BinaryPassManager.cpp4
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp3
-rw-r--r--bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp161
9 files changed, 201 insertions, 40 deletions
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index 311d5c15b8dc..eeab1ed4d7cf 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -103,9 +103,18 @@ bool BinaryBasicBlock::validateSuccessorInvariants() {
Valid &= (Sym == Function->getFunctionEndLabel() ||
Sym == Function->getFunctionEndLabel(getFragmentNum()));
if (!Valid) {
- BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: "
- << Sym->getName() << "\n";
+ const BinaryFunction *TargetBF = BC.getFunctionForSymbol(Sym);
+ if (TargetBF) {
+ // It's possible for another function to be in the jump table entry
+ // as a result of built-in unreachable.
+ Valid = true;
+ } else {
+ BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: "
+ << Sym->getName() << "\n";
+ }
}
+ if (!Valid)
+ break;
}
}
} else {
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index da59a188c6b6..23a5a65c2c5f 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -207,7 +207,7 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
Twine("BOLT-ERROR: ", Error));
std::unique_ptr<const MCRegisterInfo> MRI(
- TheTarget->createMCRegInfo(TripleName));
+ TheTarget->createMCRegInfo(TheTriple));
if (!MRI)
return createStringError(
make_error_code(std::errc::not_supported),
@@ -215,7 +215,7 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
// Set up disassembler.
std::unique_ptr<MCAsmInfo> AsmInfo(
- TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
+ TheTarget->createMCAsmInfo(*MRI, TheTriple, MCTargetOptions()));
if (!AsmInfo)
return createStringError(
make_error_code(std::errc::not_supported),
@@ -227,7 +227,7 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
AsmInfo->setAllowAtInName(true);
std::unique_ptr<const MCSubtargetInfo> STI(
- TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
+ TheTarget->createMCSubtargetInfo(TheTriple, "", FeaturesStr));
if (!STI)
return createStringError(
make_error_code(std::errc::not_supported),
@@ -1568,23 +1568,19 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
const DWARFDebugLine::LineTable *LineTable =
DwCtx->getLineTableForUnit(SrcUnit);
- const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
- LineTable->Prologue.FileNames;
- // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
+ const DWARFDebugLine::FileNameEntry &FileNameEntry =
+ LineTable->Prologue.getFileNameEntry(FileIndex);
+ // Dir indexes start at 1 and a dir index 0
// means empty dir.
- assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
- "FileIndex out of range for the compilation unit.");
StringRef Dir = "";
- if (FileNames[FileIndex - 1].DirIdx != 0) {
+ if (FileNameEntry.DirIdx != 0) {
if (std::optional<const char *> DirName = dwarf::toString(
- LineTable->Prologue
- .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
+ LineTable->Prologue.IncludeDirectories[FileNameEntry.DirIdx - 1])) {
Dir = *DirName;
}
}
StringRef FileName = "";
- if (std::optional<const char *> FName =
- dwarf::toString(FileNames[FileIndex - 1].Name))
+ if (std::optional<const char *> FName = dwarf::toString(FileNameEntry.Name))
FileName = *FName;
assert(FileName != "");
DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
@@ -1925,7 +1921,7 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
StringRef FileName = "";
if (std::optional<const char *> FName =
- dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
+ dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name))
FileName = *FName;
OS << " # debug line " << FileName << ":" << Row.Line;
if (Row.Column)
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 8f494f105fbb..6cac2d0cca2c 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1959,7 +1959,9 @@ void BinaryFunction::postProcessJumpTables() {
return EntryAddress == Parent->getAddress() + Parent->getSize();
});
if (IsBuiltinUnreachable) {
- MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true);
+ BinaryFunction *TargetBF = BC.getBinaryFunctionAtAddress(EntryAddress);
+ MCSymbol *Label = TargetBF ? TargetBF->getSymbol()
+ : getOrCreateLocalLabel(EntryAddress, true);
JT.Entries.push_back(Label);
continue;
}
@@ -3771,6 +3773,8 @@ MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
assert(Offset && "cannot add primary entry point");
const uint64_t EntryPointAddress = getAddress() + Offset;
+ assert(!isInConstantIsland(EntryPointAddress) &&
+ "cannot add entry point that points to constant data");
MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index d7f02b947003..2f1bb21bc1fd 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1843,7 +1843,7 @@ Error StripRepRet::runOnFunctions(BinaryContext &BC) {
}
Error InlineMemcpy::runOnFunctions(BinaryContext &BC) {
- if (!BC.isX86())
+ if (!BC.isX86() && !BC.isAArch64())
return Error::success();
uint64_t NumInlined = 0;
@@ -1866,8 +1866,16 @@ Error InlineMemcpy::runOnFunctions(BinaryContext &BC) {
const bool IsMemcpy8 = (CalleeSymbol->getName() == "_memcpy8");
const bool IsTailCall = BC.MIB->isTailCall(Inst);
+ // Extract size from preceding instructions (AArch64 only).
+ // Pattern: MOV X2, #nb-bytes; BL memcpy src, dest, X2.
+ std::optional<uint64_t> KnownSize =
+ BC.MIB->findMemcpySizeInBytes(BB, II);
+
+ if (BC.isAArch64() && (!KnownSize.has_value() || *KnownSize > 64))
+ continue;
+
const InstructionListType NewCode =
- BC.MIB->createInlineMemcpy(IsMemcpy8);
+ BC.MIB->createInlineMemcpy(IsMemcpy8, KnownSize);
II = BB.replaceInstruction(II, NewCode);
std::advance(II, NewCode.size() - 1);
if (IsTailCall) {
diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
index 2b5a591f4c7a..8a01cb974c5d 100644
--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
+++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
@@ -261,10 +261,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) {
MCSymbol *Entry = JT->Entries[I];
const BinaryBasicBlock *ToBB = BF.getBasicBlockForLabel(Entry);
- assert(ToBB || Entry == BF.getFunctionEndLabel() ||
- Entry == BF.getFunctionEndLabel(FragmentNum::cold()));
- if (Entry == BF.getFunctionEndLabel() ||
- Entry == BF.getFunctionEndLabel(FragmentNum::cold()))
+ if (!ToBB)
continue;
const Location To(Entry);
const BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(*ToBB);
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index f928dd49edb2..65c84ebc8c4f 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1078,6 +1078,15 @@ protected:
dbgs() << ")\n";
});
+ // If this instruction terminates the program immediately, no
+ // authentication oracles are possible past this point.
+ if (BC.MIB->isTrap(Point)) {
+ LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+ DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+ Next.CannotEscapeUnchecked.set();
+ return Next;
+ }
+
// If this instruction is reachable by the analysis, a non-empty state will
// be propagated to it sooner or later. Until then, skip computeNext().
if (Cur.empty()) {
@@ -1185,8 +1194,8 @@ protected:
//
// A basic block without any successors, on the other hand, can be
// pessimistically initialized to everything-is-unsafe: this will naturally
- // handle both return and tail call instructions and is harmless for
- // internal indirect branch instructions (such as computed gotos).
+ // handle return, trap and tail call instructions. At the same time, it is
+ // harmless for internal indirect branch instructions, like computed gotos.
if (BB.succ_empty())
return createUnsafeState();
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 0ddb73f82887..d9b7a2bd9a14 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -248,7 +248,9 @@ static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
static cl::opt<bool> StringOps(
"inline-memcpy",
- cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
+ cl::desc(
+ "inline memcpy using size-specific optimized instructions "
+ "(X86: 'rep movsb', AArch64: width-optimized register operations)"),
cl::cat(BoltOptCategory));
static cl::opt<bool> StripRepRet(
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 4f5a75b770ce..a6e4dbc9c192 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -2935,7 +2935,8 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
ReferencedSymbol = nullptr;
ExtractedValue = Address;
} else if (RefFunctionOffset) {
- if (ContainingBF && ContainingBF != ReferencedBF) {
+ if (ContainingBF && ContainingBF != ReferencedBF &&
+ !ReferencedBF->isInConstantIsland(Address)) {
ReferencedSymbol =
ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
} else {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 973261765f95..f972646aa12e 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -382,10 +382,9 @@ public:
// the list of successors of this basic block as appropriate.
// Any of the above code sequences assume the fall-through basic block
- // is a dead-end BRK instruction (any immediate operand is accepted).
+ // is a dead-end trap instruction.
const BinaryBasicBlock *BreakBB = BB.getFallthrough();
- if (!BreakBB || BreakBB->empty() ||
- BreakBB->front().getOpcode() != AArch64::BRK)
+ if (!BreakBB || BreakBB->empty() || !isTrap(BreakBB->front()))
return std::nullopt;
// Iterate over the instructions of BB in reverse order, matching opcodes
@@ -1744,6 +1743,34 @@ public:
Inst.addOperand(MCOperand::createImm(0));
}
+ bool isTrap(const MCInst &Inst) const override {
+ if (Inst.getOpcode() != AArch64::BRK)
+ return false;
+ // Only match the immediate values that are likely to indicate this BRK
+ // instruction is emitted to terminate the program immediately and not to
+ // be handled by a SIGTRAP handler, for example.
+ switch (Inst.getOperand(0).getImm()) {
+ case 0xc470:
+ case 0xc471:
+ case 0xc472:
+ case 0xc473:
+ // Explicit Pointer Authentication check failed, see
+ // AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue().
+ return true;
+ case 0x1:
+ // __builtin_trap(), as emitted by Clang.
+ return true;
+ case 0x3e8: // decimal 1000
+ // __builtin_trap(), as emitted by GCC.
+ return true;
+ default:
+ // Some constants may indicate intentionally recoverable break-points.
+ // This is the case at least for 0xf000, which is used by
+ // __builtin_debugtrap() supported by Clang.
+ return false;
+ }
+ }
+
bool isStorePair(const MCInst &Inst) const {
const unsigned opcode = Inst.getOpcode();
@@ -2490,26 +2517,18 @@ public:
createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf,
unsigned CodePointerSize) const override {
unsigned int I = 0;
- InstructionListType Instrs(IsLeaf ? 12 : 10);
+ InstructionListType Instrs(6);
- if (IsLeaf)
- createStackPointerIncrement(Instrs[I++], 128);
createPushRegisters(Instrs[I++], AArch64::X0, AArch64::X1);
- getSystemFlag(Instrs[I++], AArch64::X1);
InstructionListType Addr = materializeAddress(Target, Ctx, AArch64::X0);
assert(Addr.size() == 2 && "Invalid Addr size");
std::copy(Addr.begin(), Addr.end(), Instrs.begin() + I);
I += Addr.size();
- storeReg(Instrs[I++], AArch64::X2, AArch64::SP);
- InstructionListType Insts = createIncMemory(AArch64::X0, AArch64::X2);
+ InstructionListType Insts = createIncMemory(AArch64::X0, AArch64::X1);
assert(Insts.size() == 2 && "Invalid Insts size");
std::copy(Insts.begin(), Insts.end(), Instrs.begin() + I);
I += Insts.size();
- loadReg(Instrs[I++], AArch64::X2, AArch64::SP);
- setSystemFlag(Instrs[I++], AArch64::X1);
createPopRegisters(Instrs[I++], AArch64::X0, AArch64::X1);
- if (IsLeaf)
- createStackPointerDecrement(Instrs[I++], 128);
return Instrs;
}
@@ -2597,6 +2616,122 @@ public:
getInstructionSize(const MCInst &Inst) const override {
return 4;
}
+
+ std::optional<uint64_t>
+ extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const override {
+ // Match MOVZ instructions (both X and W register variants) with no shift.
+ if ((Inst.getOpcode() == AArch64::MOVZXi ||
+ Inst.getOpcode() == AArch64::MOVZWi) &&
+ Inst.getOperand(2).getImm() == 0 &&
+ getAliases(TargetReg)[Inst.getOperand(0).getReg()])
+ return Inst.getOperand(1).getImm();
+ return std::nullopt;
+ }
+
+ std::optional<uint64_t>
+ findMemcpySizeInBytes(const BinaryBasicBlock &BB,
+ BinaryBasicBlock::iterator CallInst) const override {
+ MCPhysReg SizeReg = getIntArgRegister(2);
+ if (SizeReg == getNoRegister())
+ return std::nullopt;
+
+ BitVector WrittenRegs(RegInfo->getNumRegs());
+ const BitVector &SizeRegAliases = getAliases(SizeReg);
+
+ for (auto InstIt = BB.begin(); InstIt != CallInst; ++InstIt) {
+ const MCInst &Inst = *InstIt;
+ WrittenRegs.reset();
+ getWrittenRegs(Inst, WrittenRegs);
+
+ if (WrittenRegs.anyCommon(SizeRegAliases))
+ return extractMoveImmediate(Inst, SizeReg);
+ }
+ return std::nullopt;
+ }
+
+ InstructionListType
+ createInlineMemcpy(bool ReturnEnd,
+ std::optional<uint64_t> KnownSize) const override {
+ assert(KnownSize.has_value() &&
+ "AArch64 memcpy inlining requires known size");
+ InstructionListType Code;
+ uint64_t Size = *KnownSize;
+
+ generateSizeSpecificMemcpy(Code, Size);
+
+ // If _memcpy8, adjust X0 to return dest+size instead of dest.
+ if (ReturnEnd)
+ Code.emplace_back(MCInstBuilder(AArch64::ADDXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(Size)
+ .addImm(0));
+ return Code;
+ }
+
+ InstructionListType generateSizeSpecificMemcpy(InstructionListType &Code,
+ uint64_t Size) const {
+ auto AddLoadStorePair = [&](unsigned LoadOpc, unsigned StoreOpc,
+ unsigned Reg, unsigned Offset = 0) {
+ Code.emplace_back(MCInstBuilder(LoadOpc)
+ .addReg(Reg)
+ .addReg(AArch64::X1)
+ .addImm(Offset));
+ Code.emplace_back(MCInstBuilder(StoreOpc)
+ .addReg(Reg)
+ .addReg(AArch64::X0)
+ .addImm(Offset));
+ };
+
+ // Generate optimal instruction sequences based on exact size.
+ switch (Size) {
+ case 1:
+ AddLoadStorePair(AArch64::LDRBBui, AArch64::STRBBui, AArch64::W9);
+ break;
+ case 2:
+ AddLoadStorePair(AArch64::LDRHHui, AArch64::STRHHui, AArch64::W9);
+ break;
+ case 4:
+ AddLoadStorePair(AArch64::LDRWui, AArch64::STRWui, AArch64::W9);
+ break;
+ case 8:
+ AddLoadStorePair(AArch64::LDRXui, AArch64::STRXui, AArch64::X9);
+ break;
+ case 16:
+ AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q16);
+ break;
+ case 32:
+ AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q16, 0);
+ AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q17, 1);
+ break;
+
+ default:
+ // For sizes up to 64 bytes, greedily use the largest possible loads.
+ // Caller should have already filtered out sizes > 64 bytes.
+ assert(Size <= 64 &&
+ "Size should be <= 64 bytes for AArch64 memcpy inlining");
+
+ uint64_t Remaining = Size;
+ uint64_t Offset = 0;
+
+ const std::array<std::tuple<uint64_t, unsigned, unsigned, unsigned>, 5>
+ LoadStoreOps = {
+ {{16, AArch64::LDRQui, AArch64::STRQui, AArch64::Q16},
+ {8, AArch64::LDRXui, AArch64::STRXui, AArch64::X9},
+ {4, AArch64::LDRWui, AArch64::STRWui, AArch64::W9},
+ {2, AArch64::LDRHHui, AArch64::STRHHui, AArch64::W9},
+ {1, AArch64::LDRBBui, AArch64::STRBBui, AArch64::W9}}};
+
+ for (const auto &[OpSize, LoadOp, StoreOp, TempReg] : LoadStoreOps)
+ while (Remaining >= OpSize) {
+ AddLoadStorePair(LoadOp, StoreOp, TempReg, Offset / OpSize);
+ Remaining -= OpSize;
+ Offset += OpSize;
+ }
+ break;
+ }
+ return Code;
+ }
};
} // end anonymous namespace