diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /bolt/lib | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'bolt/lib')
| -rw-r--r-- | bolt/lib/Core/BinaryBasicBlock.cpp | 13 | ||||
| -rw-r--r-- | bolt/lib/Core/BinaryContext.cpp | 24 | ||||
| -rw-r--r-- | bolt/lib/Core/BinaryFunction.cpp | 6 | ||||
| -rw-r--r-- | bolt/lib/Passes/BinaryPasses.cpp | 12 | ||||
| -rw-r--r-- | bolt/lib/Passes/IndirectCallPromotion.cpp | 5 | ||||
| -rw-r--r-- | bolt/lib/Passes/PAuthGadgetScanner.cpp | 13 | ||||
| -rw-r--r-- | bolt/lib/Rewrite/BinaryPassManager.cpp | 4 | ||||
| -rw-r--r-- | bolt/lib/Rewrite/RewriteInstance.cpp | 3 | ||||
| -rw-r--r-- | bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 161 |
9 files changed, 201 insertions, 40 deletions
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp index 311d5c15b8dc..eeab1ed4d7cf 100644 --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -103,9 +103,18 @@ bool BinaryBasicBlock::validateSuccessorInvariants() { Valid &= (Sym == Function->getFunctionEndLabel() || Sym == Function->getFunctionEndLabel(getFragmentNum())); if (!Valid) { - BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: " - << Sym->getName() << "\n"; + const BinaryFunction *TargetBF = BC.getFunctionForSymbol(Sym); + if (TargetBF) { + // It's possible for another function to be in the jump table entry + // as a result of built-in unreachable. + Valid = true; + } else { + BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: " + << Sym->getName() << "\n"; + } } + if (!Valid) + break; } } } else { diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index da59a188c6b6..23a5a65c2c5f 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -207,7 +207,7 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( Twine("BOLT-ERROR: ", Error)); std::unique_ptr<const MCRegisterInfo> MRI( - TheTarget->createMCRegInfo(TripleName)); + TheTarget->createMCRegInfo(TheTriple)); if (!MRI) return createStringError( make_error_code(std::errc::not_supported), @@ -215,7 +215,7 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( // Set up disassembler. std::unique_ptr<MCAsmInfo> AsmInfo( - TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); + TheTarget->createMCAsmInfo(*MRI, TheTriple, MCTargetOptions())); if (!AsmInfo) return createStringError( make_error_code(std::errc::not_supported), @@ -227,7 +227,7 @@ Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( AsmInfo->setAllowAtInName(true); std::unique_ptr<const MCSubtargetInfo> STI( - TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); + TheTarget->createMCSubtargetInfo(TheTriple, "", FeaturesStr)); if (!STI) return createStringError( make_error_code(std::errc::not_supported), @@ -1568,23 +1568,19 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(SrcUnit); - const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = - LineTable->Prologue.FileNames; - // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 + const DWARFDebugLine::FileNameEntry &FileNameEntry = + LineTable->Prologue.getFileNameEntry(FileIndex); + // Dir indexes start at 1 and a dir index 0 // means empty dir. - assert(FileIndex > 0 && FileIndex <= FileNames.size() && - "FileIndex out of range for the compilation unit."); StringRef Dir = ""; - if (FileNames[FileIndex - 1].DirIdx != 0) { + if (FileNameEntry.DirIdx != 0) { if (std::optional<const char *> DirName = dwarf::toString( - LineTable->Prologue - .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { + LineTable->Prologue.IncludeDirectories[FileNameEntry.DirIdx - 1])) { Dir = *DirName; } } StringRef FileName = ""; - if (std::optional<const char *> FName = - dwarf::toString(FileNames[FileIndex - 1].Name)) + if (std::optional<const char *> FName = dwarf::toString(FileNameEntry.Name)) FileName = *FName; assert(FileName != ""); DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); @@ -1925,7 +1921,7 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; StringRef FileName = ""; if (std::optional<const char *> FName = - dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) + dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name)) FileName = *FName; OS << " # debug line " << FileName << ":" << Row.Line; if (Row.Column) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 8f494f105fbb..6cac2d0cca2c 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1959,7 +1959,9 @@ void BinaryFunction::postProcessJumpTables() { return EntryAddress == Parent->getAddress() + Parent->getSize(); }); if (IsBuiltinUnreachable) { - MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true); + BinaryFunction *TargetBF = BC.getBinaryFunctionAtAddress(EntryAddress); + MCSymbol *Label = TargetBF ? TargetBF->getSymbol() + : getOrCreateLocalLabel(EntryAddress, true); JT.Entries.push_back(Label); continue; } @@ -3771,6 +3773,8 @@ MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) { assert(Offset && "cannot add primary entry point"); const uint64_t EntryPointAddress = getAddress() + Offset; + assert(!isInConstantIsland(EntryPointAddress) && + "cannot add entry point that points to constant data"); MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress); MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol); diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index d7f02b947003..2f1bb21bc1fd 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1843,7 +1843,7 @@ Error StripRepRet::runOnFunctions(BinaryContext &BC) { } Error InlineMemcpy::runOnFunctions(BinaryContext &BC) { - if (!BC.isX86()) + if (!BC.isX86() && !BC.isAArch64()) return Error::success(); uint64_t NumInlined = 0; @@ -1866,8 +1866,16 @@ Error InlineMemcpy::runOnFunctions(BinaryContext &BC) { const bool IsMemcpy8 = (CalleeSymbol->getName() == "_memcpy8"); const bool IsTailCall = BC.MIB->isTailCall(Inst); + // Extract size from preceding instructions (AArch64 only). + // Pattern: MOV X2, #nb-bytes; BL memcpy src, dest, X2. + std::optional<uint64_t> KnownSize = + BC.MIB->findMemcpySizeInBytes(BB, II); + + if (BC.isAArch64() && (!KnownSize.has_value() || *KnownSize > 64)) + continue; + const InstructionListType NewCode = - BC.MIB->createInlineMemcpy(IsMemcpy8); + BC.MIB->createInlineMemcpy(IsMemcpy8, KnownSize); II = BB.replaceInstruction(II, NewCode); std::advance(II, NewCode.size() - 1); if (IsTailCall) { diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index 2b5a591f4c7a..8a01cb974c5d 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -261,10 +261,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB, for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) { MCSymbol *Entry = JT->Entries[I]; const BinaryBasicBlock *ToBB = BF.getBasicBlockForLabel(Entry); - assert(ToBB || Entry == BF.getFunctionEndLabel() || - Entry == BF.getFunctionEndLabel(FragmentNum::cold())); - if (Entry == BF.getFunctionEndLabel() || - Entry == BF.getFunctionEndLabel(FragmentNum::cold())) + if (!ToBB) continue; const Location To(Entry); const BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(*ToBB); diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index f928dd49edb2..65c84ebc8c4f 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -1078,6 +1078,15 @@ protected: dbgs() << ")\n"; }); + // If this instruction terminates the program immediately, no + // authentication oracles are possible past this point. + if (BC.MIB->isTrap(Point)) { + LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); }); + DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters()); + Next.CannotEscapeUnchecked.set(); + return Next; + } + // If this instruction is reachable by the analysis, a non-empty state will // be propagated to it sooner or later. Until then, skip computeNext(). if (Cur.empty()) { @@ -1185,8 +1194,8 @@ protected: // // A basic block without any successors, on the other hand, can be // pessimistically initialized to everything-is-unsafe: this will naturally - // handle both return and tail call instructions and is harmless for - // internal indirect branch instructions (such as computed gotos). + // handle return, trap and tail call instructions. At the same time, it is + // harmless for internal indirect branch instructions, like computed gotos. if (BB.succ_empty()) return createUnsafeState(); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 0ddb73f82887..d9b7a2bd9a14 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -248,7 +248,9 @@ static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), static cl::opt<bool> StringOps( "inline-memcpy", - cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), + cl::desc( + "inline memcpy using size-specific optimized instructions " + "(X86: 'rep movsb', AArch64: width-optimized register operations)"), cl::cat(BoltOptCategory)); static cl::opt<bool> StripRepRet( diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 4f5a75b770ce..a6e4dbc9c192 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2935,7 +2935,8 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, ReferencedSymbol = nullptr; ExtractedValue = Address; } else if (RefFunctionOffset) { - if (ContainingBF && ContainingBF != ReferencedBF) { + if (ContainingBF && ContainingBF != ReferencedBF && + !ReferencedBF->isInConstantIsland(Address)) { ReferencedSymbol = ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); } else { diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 973261765f95..f972646aa12e 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -382,10 +382,9 @@ public: // the list of successors of this basic block as appropriate. // Any of the above code sequences assume the fall-through basic block - // is a dead-end BRK instruction (any immediate operand is accepted). + // is a dead-end trap instruction. const BinaryBasicBlock *BreakBB = BB.getFallthrough(); - if (!BreakBB || BreakBB->empty() || - BreakBB->front().getOpcode() != AArch64::BRK) + if (!BreakBB || BreakBB->empty() || !isTrap(BreakBB->front())) return std::nullopt; // Iterate over the instructions of BB in reverse order, matching opcodes @@ -1744,6 +1743,34 @@ public: Inst.addOperand(MCOperand::createImm(0)); } + bool isTrap(const MCInst &Inst) const override { + if (Inst.getOpcode() != AArch64::BRK) + return false; + // Only match the immediate values that are likely to indicate this BRK + // instruction is emitted to terminate the program immediately and not to + // be handled by a SIGTRAP handler, for example. + switch (Inst.getOperand(0).getImm()) { + case 0xc470: + case 0xc471: + case 0xc472: + case 0xc473: + // Explicit Pointer Authentication check failed, see + // AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue(). + return true; + case 0x1: + // __builtin_trap(), as emitted by Clang. + return true; + case 0x3e8: // decimal 1000 + // __builtin_trap(), as emitted by GCC. + return true; + default: + // Some constants may indicate intentionally recoverable break-points. + // This is the case at least for 0xf000, which is used by + // __builtin_debugtrap() supported by Clang. + return false; + } + } + bool isStorePair(const MCInst &Inst) const { const unsigned opcode = Inst.getOpcode(); @@ -2490,26 +2517,18 @@ public: createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf, unsigned CodePointerSize) const override { unsigned int I = 0; - InstructionListType Instrs(IsLeaf ? 12 : 10); + InstructionListType Instrs(6); - if (IsLeaf) - createStackPointerIncrement(Instrs[I++], 128); createPushRegisters(Instrs[I++], AArch64::X0, AArch64::X1); - getSystemFlag(Instrs[I++], AArch64::X1); InstructionListType Addr = materializeAddress(Target, Ctx, AArch64::X0); assert(Addr.size() == 2 && "Invalid Addr size"); std::copy(Addr.begin(), Addr.end(), Instrs.begin() + I); I += Addr.size(); - storeReg(Instrs[I++], AArch64::X2, AArch64::SP); - InstructionListType Insts = createIncMemory(AArch64::X0, AArch64::X2); + InstructionListType Insts = createIncMemory(AArch64::X0, AArch64::X1); assert(Insts.size() == 2 && "Invalid Insts size"); std::copy(Insts.begin(), Insts.end(), Instrs.begin() + I); I += Insts.size(); - loadReg(Instrs[I++], AArch64::X2, AArch64::SP); - setSystemFlag(Instrs[I++], AArch64::X1); createPopRegisters(Instrs[I++], AArch64::X0, AArch64::X1); - if (IsLeaf) - createStackPointerDecrement(Instrs[I++], 128); return Instrs; } @@ -2597,6 +2616,122 @@ public: getInstructionSize(const MCInst &Inst) const override { return 4; } + + std::optional<uint64_t> + extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const override { + // Match MOVZ instructions (both X and W register variants) with no shift. + if ((Inst.getOpcode() == AArch64::MOVZXi || + Inst.getOpcode() == AArch64::MOVZWi) && + Inst.getOperand(2).getImm() == 0 && + getAliases(TargetReg)[Inst.getOperand(0).getReg()]) + return Inst.getOperand(1).getImm(); + return std::nullopt; + } + + std::optional<uint64_t> + findMemcpySizeInBytes(const BinaryBasicBlock &BB, + BinaryBasicBlock::iterator CallInst) const override { + MCPhysReg SizeReg = getIntArgRegister(2); + if (SizeReg == getNoRegister()) + return std::nullopt; + + BitVector WrittenRegs(RegInfo->getNumRegs()); + const BitVector &SizeRegAliases = getAliases(SizeReg); + + for (auto InstIt = BB.begin(); InstIt != CallInst; ++InstIt) { + const MCInst &Inst = *InstIt; + WrittenRegs.reset(); + getWrittenRegs(Inst, WrittenRegs); + + if (WrittenRegs.anyCommon(SizeRegAliases)) + return extractMoveImmediate(Inst, SizeReg); + } + return std::nullopt; + } + + InstructionListType + createInlineMemcpy(bool ReturnEnd, + std::optional<uint64_t> KnownSize) const override { + assert(KnownSize.has_value() && + "AArch64 memcpy inlining requires known size"); + InstructionListType Code; + uint64_t Size = *KnownSize; + + generateSizeSpecificMemcpy(Code, Size); + + // If _memcpy8, adjust X0 to return dest+size instead of dest. + if (ReturnEnd) + Code.emplace_back(MCInstBuilder(AArch64::ADDXri) + .addReg(AArch64::X0) + .addReg(AArch64::X0) + .addImm(Size) + .addImm(0)); + return Code; + } + + InstructionListType generateSizeSpecificMemcpy(InstructionListType &Code, + uint64_t Size) const { + auto AddLoadStorePair = [&](unsigned LoadOpc, unsigned StoreOpc, + unsigned Reg, unsigned Offset = 0) { + Code.emplace_back(MCInstBuilder(LoadOpc) + .addReg(Reg) + .addReg(AArch64::X1) + .addImm(Offset)); + Code.emplace_back(MCInstBuilder(StoreOpc) + .addReg(Reg) + .addReg(AArch64::X0) + .addImm(Offset)); + }; + + // Generate optimal instruction sequences based on exact size. + switch (Size) { + case 1: + AddLoadStorePair(AArch64::LDRBBui, AArch64::STRBBui, AArch64::W9); + break; + case 2: + AddLoadStorePair(AArch64::LDRHHui, AArch64::STRHHui, AArch64::W9); + break; + case 4: + AddLoadStorePair(AArch64::LDRWui, AArch64::STRWui, AArch64::W9); + break; + case 8: + AddLoadStorePair(AArch64::LDRXui, AArch64::STRXui, AArch64::X9); + break; + case 16: + AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q16); + break; + case 32: + AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q16, 0); + AddLoadStorePair(AArch64::LDRQui, AArch64::STRQui, AArch64::Q17, 1); + break; + + default: + // For sizes up to 64 bytes, greedily use the largest possible loads. + // Caller should have already filtered out sizes > 64 bytes. + assert(Size <= 64 && + "Size should be <= 64 bytes for AArch64 memcpy inlining"); + + uint64_t Remaining = Size; + uint64_t Offset = 0; + + const std::array<std::tuple<uint64_t, unsigned, unsigned, unsigned>, 5> + LoadStoreOps = { + {{16, AArch64::LDRQui, AArch64::STRQui, AArch64::Q16}, + {8, AArch64::LDRXui, AArch64::STRXui, AArch64::X9}, + {4, AArch64::LDRWui, AArch64::STRWui, AArch64::W9}, + {2, AArch64::LDRHHui, AArch64::STRHHui, AArch64::W9}, + {1, AArch64::LDRBBui, AArch64::STRBBui, AArch64::W9}}}; + + for (const auto &[OpSize, LoadOp, StoreOp, TempReg] : LoadStoreOps) + while (Remaining >= OpSize) { + AddLoadStorePair(LoadOp, StoreOp, TempReg, Offset / OpSize); + Remaining -= OpSize; + Offset += OpSize; + } + break; + } + return Code; + } }; } // end anonymous namespace |
