diff options
| author | Alexander Yermolovich <ayermolo@meta.com> | 2024-12-18 15:27:40 -0800 |
|---|---|---|
| committer | Amir Ayupov <aaupov@fb.com> | 2024-12-18 15:27:40 -0800 |
| commit | 5cc1a45125e1e3d2d3a874b2f21e06f481dec6bd (patch) | |
| tree | 022a1845c57fad55090a590ff84beb5c13b1d7c4 | |
| parent | 9cdb7d2b6c333874ec969ef6ac64e0354bb3aa91 (diff) | |
[𝘀𝗽𝗿] changes to main this commit is based onusers/aaupov/spr/main.bolt-icf-aware-indirect-call-promotion
Created using spr 1.3.4
[skip ci]
38 files changed, 3016 insertions, 169 deletions
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 6d3b797da378..91918d614a90 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -498,9 +498,12 @@ Automatically put hot code on 2MB page(s) (hugify) at runtime. No manual call to hugify is needed in the binary (which is what --hot-text relies on). -- `--icf` +- `--icf=<value>` Fold functions with identical code + - `all`: Enable identical code folding + - `none`: Disable identical code folding (default) + - `safe`: Enable safe identical code folding - `--icp` diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 115e59ca0697..8e061ff38561 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -230,6 +230,12 @@ class BinaryContext { /// Functions injected by BOLT std::vector<BinaryFunction *> InjectedBinaryFunctions; + /// Thunk functions. + std::vector<BinaryFunction *> ThunkBinaryFunctions; + + /// Function that precedes thunks in the binary. + const BinaryFunction *ThunkLocation{nullptr}; + /// Jump tables for all functions mapped by address. std::map<uint64_t, JumpTable *> JumpTables; @@ -541,6 +547,16 @@ public: return InjectedBinaryFunctions; } + BinaryFunction *createThunkBinaryFunction(const std::string &Name); + + std::vector<BinaryFunction *> &getThunkBinaryFunctions() { + return ThunkBinaryFunctions; + } + + const BinaryFunction *getThunkLocation() const { return ThunkLocation; } + + void setThunkLocation(const BinaryFunction *BF) { ThunkLocation = BF; } + /// Return vector with all functions, i.e. include functions from the input /// binary and functions created by BOLT. std::vector<BinaryFunction *> getAllBinaryFunctions(); @@ -1360,6 +1376,10 @@ public: uint64_t computeInstructionSize(const MCInst &Inst, const MCCodeEmitter *Emitter = nullptr) const { + // FIXME: hack for faster size computation on aarch64. + if (isAArch64()) + return MIB->isPseudo(Inst) ? 0 : 4; + if (std::optional<uint32_t> Size = MIB->getSize(Inst)) return *Size; diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 7560908c250c..02fd6329e807 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -360,6 +360,10 @@ private: /// True if another function body was merged into this one. bool HasFunctionsFoldedInto{false}; + /// True if the function is used for remapping hot text and shall not be + /// placed on a huge page. + bool IsHotTextMover{false}; + /// Name for the section this function code should reside in. std::string CodeSectionName; @@ -428,6 +432,9 @@ private: /// Function order for streaming into the destination binary. uint32_t Index{-1U}; + /// Function is referenced by a non-control flow instruction. + bool HasAddressTaken{false}; + /// Get basic block index assuming it belongs to this function. unsigned getIndex(const BinaryBasicBlock *BB) const { assert(BB->getIndex() < BasicBlocks.size()); @@ -822,6 +829,14 @@ public: return nullptr; } + /// Return true if function is referenced in a non-control flow instruction. + /// This flag is set when the code and relocation analyses are being + /// performed, which occurs when safe ICF (Identical Code Folding) is enabled. + bool hasAddressTaken() const { return HasAddressTaken; } + + /// Set whether function is referenced in a non-control flow instruction. + void setHasAddressTaken(bool AddressTaken) { HasAddressTaken = AddressTaken; } + /// Returns the raw binary encoding of this function. ErrorOr<ArrayRef<uint8_t>> getData() const; @@ -1368,6 +1383,8 @@ public: /// Return true if the original entry point was patched. bool isPatched() const { return IsPatched; } + bool isHotTextMover() const { return IsHotTextMover; } + const JumpTable *getJumpTable(const MCInst &Inst) const { const uint64_t Address = BC.MIB->getJumpTable(Inst); return getJumpTableContainingAddress(Address); @@ -1720,6 +1737,8 @@ public: void setIsPatched(bool V) { IsPatched = V; } + void setHotTextMover(bool V) { IsHotTextMover = V; } + void setHasIndirectTargetToSplitFragment(bool V) { HasIndirectTargetToSplitFragment = V; } @@ -2135,6 +2154,9 @@ public: // adjustments. void handleAArch64IndirectCall(MCInst &Instruction, const uint64_t Offset); + /// Analyze instruction to identify a function reference. + void analyzeInstructionForFuncReference(const MCInst &Inst); + /// Scan function for references to other functions. In relocation mode, /// add relocations for external references. In non-relocation mode, detect /// and mark new entry points. diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h index d1acba0f26c7..bd22c536c56f 100644 --- a/bolt/include/bolt/Core/DIEBuilder.h +++ b/bolt/include/bolt/Core/DIEBuilder.h @@ -162,7 +162,7 @@ private: /// Clone an attribute in reference format. void cloneDieOffsetReferenceAttribute( - DIE &Die, const DWARFUnit &U, const DWARFDie &InputDIE, + DIE &Die, DWARFUnit &U, const DWARFDie &InputDIE, const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, uint64_t Ref); /// Clone an attribute in block format. diff --git a/bolt/include/bolt/Core/DebugNames.h b/bolt/include/bolt/Core/DebugNames.h index 0e61a0e4f9d9..cc4e13a481b2 100644 --- a/bolt/include/bolt/Core/DebugNames.h +++ b/bolt/include/bolt/Core/DebugNames.h @@ -72,8 +72,8 @@ public: return std::move(FullTableBuffer); } /// Adds a DIE that is referenced across CUs. - void addCrossCUDie(const DIE *Die) { - CrossCUDies.insert({Die->getOffset(), Die}); + void addCrossCUDie(DWARFUnit *Unit, const DIE *Die) { + CrossCUDies.insert({Die->getOffset(), {Unit, Die}}); } /// Returns true if the DIE can generate an entry for a cross cu reference. /// This only checks TAGs of a DIE because when this is invoked DIE might not @@ -145,7 +145,7 @@ private: llvm::DenseMap<uint64_t, uint32_t> CUOffsetsToPatch; // Contains a map of Entry ID to Entry relative offset. llvm::DenseMap<uint64_t, uint32_t> EntryRelativeOffsets; - llvm::DenseMap<uint64_t, const DIE *> CrossCUDies; + llvm::DenseMap<uint64_t, std::pair<DWARFUnit *, const DIE *>> CrossCUDies; /// Adds Unit to either CUList, LocalTUList or ForeignTUList. /// Input Unit being processed, and DWO ID if Unit is being processed comes /// from a DWO section. @@ -191,6 +191,29 @@ private: void emitData(); /// Emit augmentation string. void emitAugmentationString() const; + /// Creates a new entry for a given DIE. + std::optional<BOLTDWARF5AccelTableData *> + addEntry(DWARFUnit &DU, const DIE &CurrDie, + const std::optional<uint64_t> &DWOID, + const std::optional<BOLTDWARF5AccelTableData *> &Parent, + const std::optional<std::string> &Name, + const uint32_t NumberParentsInChain); + /// Returns UnitID for a given DWARFUnit. + uint32_t getUnitID(const DWARFUnit &Unit, + const std::optional<uint64_t> &DWOID, bool &IsTU); + std::optional<std::string> getName(DWARFUnit &DU, + const std::optional<uint64_t> &DWOID, + const std::string &NameToUse, + DIEValue ValName); + /// Processes a DIE with references to other DIEs for DW_AT_name and + /// DW_AT_linkage_name resolution. + /// If DW_AT_name exists method creates a new entry for this DIE and returns + /// it. + std::optional<BOLTDWARF5AccelTableData *> processReferencedDie( + DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID, + const std::optional<BOLTDWARF5AccelTableData *> &Parent, + const std::string &NameToUse, const uint32_t NumberParentsInChain, + const dwarf::Attribute &Attr); }; } // namespace bolt } // namespace llvm diff --git a/bolt/include/bolt/Passes/IdenticalCodeFolding.h b/bolt/include/bolt/Passes/IdenticalCodeFolding.h index b4206fa36074..f59e75c61860 100644 --- a/bolt/include/bolt/Passes/IdenticalCodeFolding.h +++ b/bolt/include/bolt/Passes/IdenticalCodeFolding.h @@ -11,6 +11,7 @@ #include "bolt/Core/BinaryFunction.h" #include "bolt/Passes/BinaryPasses.h" +#include "llvm/ADT/SparseBitVector.h" namespace llvm { namespace bolt { @@ -20,22 +21,72 @@ namespace bolt { /// class IdenticalCodeFolding : public BinaryFunctionPass { protected: - bool shouldOptimize(const BinaryFunction &BF) const override { - if (BF.hasUnknownControlFlow()) - return false; - if (BF.isFolded()) - return false; - if (BF.hasSDTMarker()) - return false; - return BinaryFunctionPass::shouldOptimize(BF); - } + /// Return true if the function is safe to fold. + bool shouldOptimize(const BinaryFunction &BF) const override; public: + enum class ICFLevel { + None, /// No ICF. (Default) + Safe, /// Safe ICF. + All, /// Aggressive ICF. + }; explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass) : BinaryFunctionPass(PrintPass) {} const char *getName() const override { return "identical-code-folding"; } Error runOnFunctions(BinaryContext &BC) override; + +private: + /// Bit vector of memory addresses of vtables. + llvm::SparseBitVector<> VTableBitVector; + + /// Return true if the memory address is in a vtable. + bool isAddressInVTable(uint64_t Address) const { + return VTableBitVector.test(Address / 8); + } + + /// Mark memory address of a vtable as used. + void setAddressUsedInVTable(uint64_t Address) { + VTableBitVector.set(Address / 8); + } + + /// Scan symbol table and mark memory addresses of + /// vtables. + void initVTableReferences(const BinaryContext &BC); + + /// Analyze code section and relocations and mark functions that are not + /// safe to fold. + void markFunctionsUnsafeToFold(BinaryContext &BC); + + /// Process static and dynamic relocations in the data sections to identify + /// function references, and mark them as unsafe to fold. It filters out + /// symbol references that are in vtables. + void analyzeDataRelocations(BinaryContext &BC); + + /// Process functions that have been disassembled and mark functions that are + /// used in non-control flow instructions as unsafe to fold. + void analyzeFunctions(BinaryContext &BC); +}; + +class DeprecatedICFNumericOptionParser + : public cl::parser<IdenticalCodeFolding::ICFLevel> { +public: + explicit DeprecatedICFNumericOptionParser(cl::Option &O) + : cl::parser<IdenticalCodeFolding::ICFLevel>(O) {} + + bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, + IdenticalCodeFolding::ICFLevel &Value) { + if (Arg == "0" || Arg == "1") { + Value = (Arg == "0") ? IdenticalCodeFolding::ICFLevel::None + : IdenticalCodeFolding::ICFLevel::All; + errs() << formatv("BOLT-WARNING: specifying numeric value \"{0}\" " + "for option -{1} is deprecated\n", + Arg, ArgName); + return false; + } + return cl::parser<IdenticalCodeFolding::ICFLevel>::parse(O, ArgName, Arg, + Value); + } }; } // namespace bolt diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h index df3ea9620918..b434cb1a975c 100644 --- a/bolt/include/bolt/Passes/LongJmp.h +++ b/bolt/include/bolt/Passes/LongJmp.h @@ -76,6 +76,28 @@ class LongJmpPass : public BinaryFunctionPass { /// 128MB of each other. void relaxLocalBranches(BinaryFunction &BF); + struct FunctionCluster { + std::set<BinaryFunction *> Functions; + + // Functions that this cluster of functions is calling. Note that it + // excludes all functions in the cluster itself. + std::set<BinaryFunction *> Callees; + + uint64_t Size{0}; + + // Last function in the cluster. + BinaryFunction *LastBF{nullptr}; + }; + + /// Maximum size of the function cluster. Note that it's less than 128MB + /// as the size of the cluster plus thunk island should be less than 128MB. + static constexpr uint64_t MaxClusterSize = 125 * 1024 * 1024; + + /// Relax calls for medium code model where code is < 256MB. + /// A thunk island will be introduced between two clusters of functions to + /// enable calls over 128MB. + void relaxCalls(BinaryContext &BC); + /// -- Layout estimation methods -- /// Try to do layout before running the emitter, by looking at BinaryFunctions /// and MCInsts -- this is an estimation. To be correct for longjmp inserter diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index ac96b836ed57..1acb287f72e7 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1609,7 +1609,21 @@ std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { SortedFunctions.begin(), [](BinaryFunction &BF) { return &BF; }); - llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex); + llvm::stable_sort(SortedFunctions, + [](const BinaryFunction *A, const BinaryFunction *B) { + // Place hot text movers at the start. + if (A->isHotTextMover() && !B->isHotTextMover()) + return true; + if (!A->isHotTextMover() && B->isHotTextMover()) + return false; + if (A->hasValidIndex() && B->hasValidIndex()) { + return A->getIndex() < B->getIndex(); + } + if (opts::HotFunctionsAtEnd) + return B->hasValidIndex(); + else + return A->hasValidIndex(); + }); return SortedFunctions; } @@ -2350,6 +2364,15 @@ BinaryContext::createInjectedBinaryFunction(const std::string &Name, return BF; } +BinaryFunction * +BinaryContext::createThunkBinaryFunction(const std::string &Name) { + ThunkBinaryFunctions.push_back(new BinaryFunction(Name, *this, true)); + BinaryFunction *BF = ThunkBinaryFunctions.back(); + setSymbolToFunctionMap(BF->getSymbol(), BF); + BF->CurrentState = BinaryFunction::State::CFG; + return BF; +} + std::pair<size_t, size_t> BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { // Adjust branch instruction to match the current layout. diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index f34a94c57792..a78058146cd0 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -258,6 +258,14 @@ void BinaryEmitter::emitFunctions() { if (Emitted) Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics); + + // Emit thunks. + if (BC.getThunkLocation() != Function) + continue; + + for (BinaryFunction *Thunk : BC.getThunkBinaryFunctions()) { + emitFunction(*Thunk, Thunk->getLayout().getMainFragment()); + } } }; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index a9ccaea3c438..8c478b9afc10 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -111,6 +111,10 @@ cl::opt<bool> cl::desc("try to preserve basic block alignment"), cl::cat(BoltOptCategory)); +static cl::opt<bool> PrintOffsets("print-offsets", + cl::desc("print basic block offsets"), + cl::Hidden, cl::cat(BoltOptCategory)); + static cl::opt<bool> PrintOutputAddressRange( "print-output-address-range", cl::desc( @@ -532,6 +536,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { if (BB->isLandingPad()) OS << " Landing Pad\n"; + if (opts::PrintOffsets && BB->getOutputStartAddress()) { + OS << " OutputOffset: 0x" + << Twine::utohexstr(BB->getOutputStartAddress()) << '\n'; + } + uint64_t BBExecCount = BB->getExecutionCount(); if (hasValidProfile()) { OS << " Exec Count : "; @@ -1504,6 +1513,20 @@ MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) { return Target; } +void BinaryFunction::analyzeInstructionForFuncReference(const MCInst &Inst) { + for (const MCOperand &Op : MCPlus::primeOperands(Inst)) { + if (!Op.isExpr()) + continue; + const MCExpr &Expr = *Op.getExpr(); + if (Expr.getKind() != MCExpr::SymbolRef) + continue; + const MCSymbol &Symbol = cast<MCSymbolRefExpr>(Expr).getSymbol(); + // Set HasAddressTaken for a function regardless of the ICF level. + if (BinaryFunction *BF = BC.getFunctionForSymbol(&Symbol)) + BF->setHasAddressTaken(true); + } +} + bool BinaryFunction::scanExternalRefs() { bool Success = true; bool DisassemblyFailed = false; @@ -1624,6 +1647,8 @@ bool BinaryFunction::scanExternalRefs() { [](const MCOperand &Op) { return Op.isExpr(); })) { // Skip assembly if the instruction may not have any symbolic operands. continue; + } else { + analyzeInstructionForFuncReference(Instruction); } // Emit the instruction using temp emitter and generate relocations. diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp index 414912ea1c20..80ad583e079d 100644 --- a/bolt/lib/Core/DIEBuilder.cpp +++ b/bolt/lib/Core/DIEBuilder.cpp @@ -622,7 +622,7 @@ DWARFDie DIEBuilder::resolveDIEReference( } void DIEBuilder::cloneDieOffsetReferenceAttribute( - DIE &Die, const DWARFUnit &U, const DWARFDie &InputDIE, + DIE &Die, DWARFUnit &U, const DWARFDie &InputDIE, const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, uint64_t Ref) { DIE *NewRefDie = nullptr; DWARFUnit *RefUnit = nullptr; @@ -654,7 +654,7 @@ void DIEBuilder::cloneDieOffsetReferenceAttribute( // Adding referenced DIE to DebugNames to be used when entries are created // that contain cross cu references. if (DebugNamesTable.canGenerateEntryWithCrossCUReference(U, Die, AttrSpec)) - DebugNamesTable.addCrossCUDie(DieInfo.Die); + DebugNamesTable.addCrossCUDie(&U, DieInfo.Die); // no matter forward reference or backward reference, we are supposed // to calculate them in `finish` due to the possible modification of // the DIE. diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp index 280c7c505eed..366c22c38e61 100644 --- a/bolt/lib/Core/DebugNames.cpp +++ b/bolt/lib/Core/DebugNames.cpp @@ -143,7 +143,8 @@ static bool shouldIncludeVariable(const DWARFUnit &Unit, const DIE &Die) { Unit.getFormParams().Format); for (const DWARFExpression::Operation &Expr : LocExpr) if (Expr.getCode() == dwarf::DW_OP_addrx || - Expr.getCode() == dwarf::DW_OP_form_tls_address) + Expr.getCode() == dwarf::DW_OP_form_tls_address || + Expr.getCode() == dwarf::DW_OP_GNU_push_tls_address) return true; return false; } @@ -222,134 +223,113 @@ static uint64_t getEntryID(const BOLTDWARF5AccelTableData &Entry) { return reinterpret_cast<uint64_t>(&Entry); } -std::optional<BOLTDWARF5AccelTableData *> -DWARF5AcceleratorTable::addAccelTableEntry( - DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID, - const uint32_t NumberParentsInChain, - std::optional<BOLTDWARF5AccelTableData *> &Parent) { - if (Unit.getVersion() < 5 || !NeedToCreate) - return std::nullopt; - std::string NameToUse = ""; - - auto getUnitID = [&](const DWARFUnit &Unit, bool &IsTU, - uint32_t &DieTag) -> uint32_t { - IsTU = Unit.isTypeUnit(); - DieTag = Die.getTag(); - if (IsTU) { - if (DWOID) { - const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash(); - auto Iter = TUHashToIndexMap.find(TUHash); - assert(Iter != TUHashToIndexMap.end() && - "Could not find TU hash in map"); - return Iter->second; - } - return LocalTUList.size() - 1; +uint32_t DWARF5AcceleratorTable::getUnitID(const DWARFUnit &Unit, + const std::optional<uint64_t> &DWOID, + bool &IsTU) { + IsTU = Unit.isTypeUnit(); + if (IsTU) { + if (DWOID) { + const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash(); + auto Iter = TUHashToIndexMap.find(TUHash); + assert(Iter != TUHashToIndexMap.end() && "Could not find TU hash in map"); + return Iter->second; } - return CUList.size() - 1; - }; + return LocalTUList.size() - 1; + } + return CUList.size() - 1; +} - if (!canProcess(Unit, Die, NameToUse, false)) +std::optional<std::string> DWARF5AcceleratorTable::getName( + DWARFUnit &Unit, const std::optional<uint64_t> &DWOID, + const std::string &NameToUse, DIEValue ValName) { + if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) && + NameToUse.empty()) return std::nullopt; - - // Addes a Unit to either CU, LocalTU or ForeignTU list the first time we - // encounter it. - // Invoking it here so that we don't add Units that don't have any entries. - if (&Unit != CurrentUnit) { - CurrentUnit = &Unit; - addUnit(Unit, DWOID); + std::string Name = ""; + uint64_t NameIndexOffset = 0; + if (NameToUse.empty()) { + NameIndexOffset = ValName.getDIEInteger().getValue(); + if (ValName.getForm() != dwarf::DW_FORM_strp) + NameIndexOffset = getNameOffset(BC, Unit, NameIndexOffset); + // Counts on strings end with '\0'. + Name = std::string(&StrSection.data()[NameIndexOffset]); + } else { + Name = NameToUse; } - - auto getName = [&](DIEValue ValName) -> std::optional<std::string> { - if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) && - NameToUse.empty()) - return std::nullopt; - std::string Name = ""; - uint64_t NameIndexOffset = 0; - if (NameToUse.empty()) { - NameIndexOffset = ValName.getDIEInteger().getValue(); - if (ValName.getForm() != dwarf::DW_FORM_strp) - NameIndexOffset = getNameOffset(BC, Unit, NameIndexOffset); - // Counts on strings end with '\0'. - Name = std::string(&StrSection.data()[NameIndexOffset]); - } else { - Name = NameToUse; - } - auto &It = Entries[Name]; - if (It.Values.empty()) { - if (DWOID && NameToUse.empty()) { - // For DWO Unit the offset is in the .debug_str.dwo section. - // Need to find offset for the name in the .debug_str section. - llvm::hash_code Hash = llvm::hash_value(llvm::StringRef(Name)); - auto ItCache = StrCacheToOffsetMap.find(Hash); - if (ItCache == StrCacheToOffsetMap.end()) - NameIndexOffset = MainBinaryStrWriter.addString(Name); - else - NameIndexOffset = ItCache->second; - } - if (!NameToUse.empty()) + auto &It = Entries[Name]; + if (It.Values.empty()) { + if (DWOID && NameToUse.empty()) { + // For DWO Unit the offset is in the .debug_str.dwo section. + // Need to find offset for the name in the .debug_str section. + llvm::hash_code Hash = llvm::hash_value(llvm::StringRef(Name)); + auto ItCache = StrCacheToOffsetMap.find(Hash); + if (ItCache == StrCacheToOffsetMap.end()) NameIndexOffset = MainBinaryStrWriter.addString(Name); - It.StrOffset = NameIndexOffset; - // This the same hash function used in DWARF5AccelTableData. - It.HashValue = caseFoldingDjbHash(Name); + else + NameIndexOffset = ItCache->second; } - return Name; - }; + if (!NameToUse.empty()) + NameIndexOffset = MainBinaryStrWriter.addString(Name); + It.StrOffset = NameIndexOffset; + // This is the same hash function used in DWARF5AccelTableData. + It.HashValue = caseFoldingDjbHash(Name); + } + return Name; +} - auto addEntry = - [&](DIEValue ValName) -> std::optional<BOLTDWARF5AccelTableData *> { - std::optional<std::string> Name = getName(ValName); - if (!Name) - return std::nullopt; +std::optional<BOLTDWARF5AccelTableData *> DWARF5AcceleratorTable::addEntry( + DWARFUnit &DU, const DIE &CurrDie, const std::optional<uint64_t> &DWOID, + const std::optional<BOLTDWARF5AccelTableData *> &Parent, + const std::optional<std::string> &Name, + const uint32_t NumberParentsInChain) { + if (!Name) + return std::nullopt; - auto &It = Entries[*Name]; - bool IsTU = false; - uint32_t DieTag = 0; - uint32_t UnitID = getUnitID(Unit, IsTU, DieTag); - std::optional<unsigned> SecondIndex = std::nullopt; - if (IsTU && DWOID) { - auto Iter = CUOffsetsToPatch.find(*DWOID); - if (Iter == CUOffsetsToPatch.end()) - BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find " - "DWO ID in CU offsets for second Unit Index " - << *Name << ". For DIE at offset: " - << Twine::utohexstr(CurrentUnitOffset + Die.getOffset()) - << ".\n"; - SecondIndex = Iter->second; - } - std::optional<uint64_t> ParentOffset = - (Parent ? std::optional<uint64_t>(getEntryID(**Parent)) : std::nullopt); - // This will be populated later in writeEntry. - // This way only parent entries get tracked. - // Keeping memory footprint down. - if (ParentOffset) - EntryRelativeOffsets.insert({*ParentOffset, 0}); - bool IsParentRoot = false; - // If there is no parent and no valid Entries in parent chain this is a root - // to be marked with a flag. - if (!Parent && !NumberParentsInChain) - IsParentRoot = true; - It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData( - Die.getOffset(), ParentOffset, DieTag, UnitID, IsParentRoot, IsTU, - SecondIndex)); - return It.Values.back(); - }; + auto &It = Entries[*Name]; + bool IsTU = false; + uint32_t DieTag = CurrDie.getTag(); + uint32_t UnitID = getUnitID(DU, DWOID, IsTU); + std::optional<unsigned> SecondIndex = std::nullopt; + if (IsTU && DWOID) { + auto Iter = CUOffsetsToPatch.find(*DWOID); + if (Iter == CUOffsetsToPatch.end()) + BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find " + "DWO ID in CU offsets for second Unit Index " + << *Name << ". For DIE at offset: " + << Twine::utohexstr(CurrentUnitOffset + CurrDie.getOffset()) + << ".\n"; + SecondIndex = Iter->second; + } + std::optional<uint64_t> ParentOffset = + (Parent ? std::optional<uint64_t>(getEntryID(**Parent)) : std::nullopt); + // This will be only populated in writeEntry, in order to keep only the parent + // entries, and keep the footprint down. + if (ParentOffset) + EntryRelativeOffsets.insert({*ParentOffset, 0}); + bool IsParentRoot = false; + // If there is no parent and no valid Entries in parent chain this is a root + // to be marked with a flag. + if (!Parent && !NumberParentsInChain) + IsParentRoot = true; + It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData( + CurrDie.getOffset(), ParentOffset, DieTag, UnitID, IsParentRoot, IsTU, + SecondIndex)); + return It.Values.back(); +} - // Minor optimization not to add entry twice for DW_TAG_namespace if it has no - // DW_AT_name. - if (!(Die.getTag() == dwarf::DW_TAG_namespace && - !Die.findAttribute(dwarf::Attribute::DW_AT_name))) - addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name)); - // For the purposes of determining whether a debugging information entry has a - // particular attribute (such as DW_AT_name), if debugging information entry A - // has a DW_AT_specification or DW_AT_abstract_origin attribute pointing to - // another debugging information entry B, any attributes of B are considered - // to be part of A. - auto processReferencedDie = [&](const dwarf::Attribute &Attr) - -> std::optional<BOLTDWARF5AccelTableData *> { - const DIEValue Value = Die.findAttribute(Attr); +std::optional<BOLTDWARF5AccelTableData *> +DWARF5AcceleratorTable::processReferencedDie( + DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID, + const std::optional<BOLTDWARF5AccelTableData *> &Parent, + const std::string &NameToUse, const uint32_t NumberParentsInChain, + const dwarf::Attribute &Attr) { + DIEValue Value = Die.findAttribute(Attr); + if (!Value) + return std::nullopt; + auto getReferenceDie = [&](const DIEValue &Value, const DIE *RefDieUsed) + -> std::optional<std::pair<DWARFUnit *, const DIE *>> { if (!Value) return std::nullopt; - const DIE *EntryDie = nullptr; if (Value.getForm() == dwarf::DW_FORM_ref_addr) { auto Iter = CrossCUDies.find(Value.getDIEInteger().getValue()); if (Iter == CrossCUDies.end()) { @@ -359,24 +339,97 @@ DWARF5AcceleratorTable::addAccelTableEntry( << ".\n"; return std::nullopt; } - EntryDie = Iter->second; - } else { - const DIEEntry &DIEENtry = Value.getDIEEntry(); - EntryDie = &DIEENtry.getEntry(); + return Iter->second; } - - addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_linkage_name)); - return addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_name)); + const DIEEntry &DIEENtry = Value.getDIEEntry(); + return {{&Unit, &DIEENtry.getEntry()}}; }; - if (std::optional<BOLTDWARF5AccelTableData *> Entry = - processReferencedDie(dwarf::Attribute::DW_AT_abstract_origin)) + DIEValue AttrValLinkageName; + DIEValue AttrValName = Die.findAttribute(dwarf::Attribute::DW_AT_name); + DWARFUnit *RefUnit = &Unit; + const DIE *RefDieUsed = &Die; + // It is possible to have DW_TAG_subprogram only with DW_AT_linkage_name that + // DW_AT_abstract_origin/DW_AT_specification point to. + while (!AttrValName) { + std::optional<std::pair<DWARFUnit *, const DIE *>> RefDUDie = + getReferenceDie(Value, RefDieUsed); + if (!RefDUDie) + break; + RefUnit = RefDUDie->first; + const DIE &RefDie = *RefDUDie->second; + RefDieUsed = &RefDie; + if (!AttrValLinkageName) + AttrValLinkageName = + RefDie.findAttribute(dwarf::Attribute::DW_AT_linkage_name); + AttrValName = RefDie.findAttribute(dwarf::Attribute::DW_AT_name); + Value = RefDie.findAttribute(dwarf::Attribute::DW_AT_abstract_origin); + if (!Value) + Value = RefDie.findAttribute(dwarf::Attribute::DW_AT_specification); + } + addEntry(Unit, Die, DWOID, Parent, + getName(*RefUnit, DWOID, NameToUse, AttrValLinkageName), + NumberParentsInChain); + return addEntry(Unit, Die, DWOID, Parent, + getName(*RefUnit, DWOID, NameToUse, AttrValName), + NumberParentsInChain); +} + +std::optional<BOLTDWARF5AccelTableData *> +DWARF5AcceleratorTable::addAccelTableEntry( + DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID, + const uint32_t NumberParentsInChain, + std::optional<BOLTDWARF5AccelTableData *> &Parent) { + if (Unit.getVersion() < 5 || !NeedToCreate) + return std::nullopt; + std::string NameToUse = ""; + + if (!canProcess(Unit, Die, NameToUse, false)) + return std::nullopt; + + // Adds a Unit to either CU, LocalTU or ForeignTU list the first time we + // encounter it. + // Invoking it here so that we don't add Units that don't have any entries. + if (&Unit != CurrentUnit) { + CurrentUnit = &Unit; + addUnit(Unit, DWOID); + } + + // Minor optimization not to add entry twice for DW_TAG_namespace if it has no + // DW_AT_name. + std::optional<BOLTDWARF5AccelTableData *> LinkageEntry = std::nullopt; + DIEValue NameVal = Die.findAttribute(dwarf::Attribute::DW_AT_name); + DIEValue LinkageNameVal = + Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name); + if (!(Die.getTag() == dwarf::DW_TAG_namespace && !NameVal)) + LinkageEntry = addEntry(Unit, Die, DWOID, Parent, + getName(Unit, DWOID, NameToUse, LinkageNameVal), + NumberParentsInChain); + + std::optional<BOLTDWARF5AccelTableData *> NameEntry = + addEntry(Unit, Die, DWOID, Parent, + getName(Unit, DWOID, NameToUse, NameVal), NumberParentsInChain); + if (NameEntry) + return NameEntry; + + // The DIE doesn't have DW_AT_name or DW_AT_linkage_name, so we need to see if + // we can follow other attributes to find them. For the purposes of + // determining whether a debug information entry has a particular + // attribute (such as DW_AT_name), if debug information entry A has a + // DW_AT_specification or DW_AT_abstract_origin attribute pointing to another + // debug information entry B, any attributes of B are considered to be + // part of A. + if (std::optional<BOLTDWARF5AccelTableData *> Entry = processReferencedDie( + Unit, Die, DWOID, Parent, NameToUse, NumberParentsInChain, + dwarf::Attribute::DW_AT_abstract_origin)) return *Entry; - if (std::optional<BOLTDWARF5AccelTableData *> Entry = - processReferencedDie(dwarf::Attribute::DW_AT_specification)) + if (std::optional<BOLTDWARF5AccelTableData *> Entry = processReferencedDie( + Unit, Die, DWOID, Parent, NameToUse, NumberParentsInChain, + dwarf::Attribute::DW_AT_specification)) return *Entry; - return addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name)); + // This point can be hit by DW_TAG_varialbe that has no DW_AT_name. + return std::nullopt; } /// Algorithm from llvm implementation. diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp index 555f82a5a817..13c7fa130299 100644 --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -82,6 +82,11 @@ static void alignCompact(BinaryFunction &Function, size_t HotSize = 0; size_t ColdSize = 0; + if (!Function.hasProfile() && BC.isAArch64()) { + Function.setAlignment(Function.getMinAlignment()); + return; + } + for (const BinaryBasicBlock &BB : Function) if (BB.isSplit()) ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter); diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 03d3dd75a033..b4c15443e57a 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1285,6 +1285,8 @@ Error AssignSections::runOnFunctions(BinaryContext &BC) { if (opts::isHotTextMover(Function)) { Function.setCodeSectionName(BC.getHotTextMoverSectionName()); Function.setColdCodeSectionName(BC.getHotTextMoverSectionName()); + // TODO: find a better place to mark a function as a mover. + Function.setHotTextMover(true); continue; } diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp index 38e080c9dd62..8923562776cc 100644 --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -15,6 +15,7 @@ #include "bolt/Core/ParallelUtilities.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Timer.h" #include <atomic> @@ -42,8 +43,41 @@ TimeICF("time-icf", cl::ReallyHidden, cl::ZeroOrMore, cl::cat(BoltOptCategory)); + +cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, + DeprecatedICFNumericOptionParser> + ICF("icf", cl::desc("fold functions with identical code"), + cl::init(bolt::IdenticalCodeFolding::ICFLevel::None), + cl::values(clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "all", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "1", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, + "none", + "Disable identical code folding (default)"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, "0", + "Disable identical code folding (default)"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::Safe, + "safe", "Enable safe identical code folding")), + cl::ZeroOrMore, cl::ValueOptional, cl::cat(BoltOptCategory)); } // namespace opts +bool IdenticalCodeFolding::shouldOptimize(const BinaryFunction &BF) const { + if (BF.hasUnknownControlFlow()) + return false; + if (BF.isFolded()) + return false; + if (BF.hasSDTMarker()) + return false; + if (BF.isPseudo()) + return false; + if (opts::ICF == ICFLevel::Safe && BF.hasAddressTaken()) + return false; + return BinaryFunctionPass::shouldOptimize(BF); +} + /// Compare two jump tables in 2 functions. The function relies on consistent /// ordering of basic blocks in both binary functions (e.g. DFS). static bool equalJumpTables(const JumpTable &JumpTableA, @@ -340,6 +374,74 @@ typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, namespace llvm { namespace bolt { +void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) { + for (const auto &[Address, Data] : BC.getBinaryData()) { + // Filter out all symbols that are not vtables. + if (!Data->getName().starts_with("_ZTV")) + continue; + for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8) + setAddressUsedInVTable(I); + } +} + +void IdenticalCodeFolding::analyzeDataRelocations(BinaryContext &BC) { + initVTableReferences(BC); + // For static relocations there should be a symbol for function references. + for (const BinarySection &Sec : BC.sections()) { + if (!Sec.hasSectionRef() || !Sec.isData()) + continue; + for (const auto &Rel : Sec.relocations()) { + const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); + if (isAddressInVTable(RelAddr)) + continue; + if (BinaryFunction *BF = BC.getFunctionForSymbol(Rel.Symbol)) + BF->setHasAddressTaken(true); + } + // For dynamic relocations there are two cases: + // 1: No symbol and only addend. + // 2: There is a symbol, but it does not references a function in a binary. + for (const auto &Rel : Sec.dynamicRelocations()) { + const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); + if (isAddressInVTable(RelAddr)) + continue; + if (BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Rel.Addend)) + BF->setHasAddressTaken(true); + } + } +} + +void IdenticalCodeFolding::analyzeFunctions(BinaryContext &BC) { + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + for (const BinaryBasicBlock &BB : BF) + for (const MCInst &Inst : BB) + if (!(BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst))) + BF.analyzeInstructionForFuncReference(Inst); + }; + ParallelUtilities::PredicateTy SkipFunc = + [&](const BinaryFunction &BF) -> bool { return !BF.hasCFG(); }; + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipFunc, "markUnsafe"); + + LLVM_DEBUG({ + for (const auto &BFIter : BC.getBinaryFunctions()) { + if (!BFIter.second.hasAddressTaken()) + continue; + dbgs() << "BOLT-DEBUG: skipping function with reference taken " + << BFIter.second.getOneName() << '\n'; + } + }); +} + +void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) { + NamedRegionTimer MarkFunctionsUnsafeToFoldTimer( + "markFunctionsUnsafeToFold", "markFunctionsUnsafeToFold", "ICF breakdown", + "ICF breakdown", opts::TimeICF); + if (!BC.isX86()) + BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n"; + analyzeDataRelocations(BC); + analyzeFunctions(BC); +} Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); @@ -385,7 +487,7 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { "ICF breakdown", opts::TimeICF); for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; - if (!this->shouldOptimize(BF)) + if (!shouldOptimize(BF)) continue; CongruentBuckets[&BF].emplace(&BF); } @@ -475,7 +577,8 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { LLVM_DEBUG(SinglePass.stopTimer()); }; - + if (opts::ICF == ICFLevel::Safe) + markFunctionsUnsafeToFold(BC); hashFunctions(); createCongruentBuckets(); diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index c1b8c03324e0..191afe6374bc 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -31,6 +31,11 @@ static cl::opt<bool> cl::desc("generate code for binaries <128MB on AArch64"), cl::init(false), cl::cat(BoltCategory)); +static cl::opt<bool> + ExperimentalRelaxation("relax-exp", + cl::desc("run experimental relaxation pass"), + cl::init(false), cl::cat(BoltOptCategory)); + static cl::opt<bool> GroupStubs("group-stubs", cl::desc("share stubs across functions"), cl::init(true), cl::cat(BoltOptCategory)); @@ -897,12 +902,183 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) { } } +void LongJmpPass::relaxCalls(BinaryContext &BC) { + // Map every function to its direct callees. Note that this is different from + // a typical call graph as we completely ignore indirect calls. + uint64_t EstimatedSize = 0; + // Conservatively estimate emitted function size. + auto estimateFunctionSize = [&](const BinaryFunction &BF) -> uint64_t { + if (!BC.shouldEmit(BF)) + return 0; + uint64_t Size = BF.estimateSize(); + if (BF.hasValidIndex()) + Size += BF.getAlignment(); + if (BF.hasIslandsInfo()) { + Size += BF.estimateConstantIslandSize(); + Size += BF.getConstantIslandAlignment(); + } + + return Size; + }; + + std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>> CallMap; + for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { + if (!BC.shouldEmit(BF)) + continue; + + EstimatedSize += estimateFunctionSize(BF); + + for (const BinaryBasicBlock &BB : BF) { + for (const MCInst &Inst : BB) { + if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) || + BC.MIB->isIndirectBranch(Inst)) + continue; + const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst); + assert(TargetSymbol); + + BinaryFunction *Callee = BC.getFunctionForSymbol(TargetSymbol); + if (!Callee) { + /* Ignore internall calls */ + continue; + } + + CallMap[&BF].insert(Callee); + } + } + } + + LLVM_DEBUG(dbgs() << "LongJmp: estimated code size : " << EstimatedSize + << '\n'); + + // Build clusters in the order the functions will appear in the output. + std::vector<FunctionCluster> Clusters; + Clusters.emplace_back(FunctionCluster()); + + for (BinaryFunction *BF : BC.getSortedFunctions()) { + if (!BC.shouldEmit(*BF)) + continue; + + const uint64_t BFSize = estimateFunctionSize(*BF); + if (Clusters.empty() || Clusters.back().Size + BFSize > MaxClusterSize) { + Clusters.emplace_back(FunctionCluster()); + } + + FunctionCluster &FC = Clusters.back(); + FC.Functions.insert(BF); + auto It = FC.Callees.find(BF); + if (It != FC.Callees.end()) { + FC.Callees.erase(It); + } + FC.Size += BFSize; + FC.LastBF = BF; + + for (BinaryFunction *Callee : CallMap[BF]) + if (!FC.Functions.count(Callee)) + FC.Callees.insert(Callee); + } + + // Print cluster stats. + dbgs() << "Built " << Clusters.size() << " clusters\n"; + uint64_t Index = 0; + for (const FunctionCluster &FC : Clusters) { + dbgs() << " Cluster: " << Index++ << '\n'; + dbgs() << " " << FC.Functions.size() << " functions\n"; + dbgs() << " " << FC.Callees.size() << " callees\n"; + dbgs() << " " << FC.Size << " bytes\n"; + } + + if (Clusters.size() > 2) { + BC.errs() << "Large code model is unsupported\n"; + exit(1); + } + + if (Clusters.size() == 1) + return; + + // Populate one of the clusters with PLT functions based on the proximity of + // the PLT section to avoid unneeded thunk redirection. + // FIXME: this part is extremely fragile as it depends on the placement + // of PLT section and its proximity to old or new .text. + // FIXME: a slightly better approach will be to always use thunks for PLT and + // eliminate redirection later using final addresses in address maps. + const size_t PLTClusterNum = opts::UseOldText ? 1 : 0; + for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { + if (BF.isPLTFunction()) { + auto &PLTCluster = Clusters[PLTClusterNum]; + PLTCluster.Functions.insert(&BF); + auto It = PLTCluster.Callees.find(&BF); + if (It != PLTCluster.Callees.end()) + PLTCluster.Callees.erase(It); + } + } + + // FIXME: section name to use for thunks. + std::string SectionName = + Clusters[0].LastBF->getCodeSectionName().str().str(); + + // Build thunk functions. + auto createSmallThunk = [&](BinaryFunction &Callee) { + BinaryFunction *ThunkBF = + BC.createThunkBinaryFunction("__BThunk__" + Callee.getOneName().str()); + MCInst Inst; + BC.MIB->createTailCall(Inst, Callee.getSymbol(), BC.Ctx.get()); + ThunkBF->addBasicBlock()->addInstruction(Inst); + ThunkBF->setCodeSectionName(SectionName); + + return ThunkBF; + }; + + DenseMap<BinaryFunction *, BinaryFunction *> Thunks; + for (const FunctionCluster &FC : Clusters) { + for (BinaryFunction *Callee : FC.Callees) { + Thunks[Callee] = createSmallThunk(*Callee); + } + } + + BC.outs() << "BOLT-INFO: " << Thunks.size() << " thunks created\n"; + + // Replace callees with thunks. + for (FunctionCluster &FC : Clusters) { + for (BinaryFunction *BF : FC.Functions) { + if (!CallMap.count(BF)) + continue; + + for (BinaryBasicBlock &BB : *BF) { + for (MCInst &Inst : BB) { + if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) || + BC.MIB->isIndirectBranch(Inst)) + continue; + const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst); + assert(TargetSymbol); + + BinaryFunction *Callee = BC.getFunctionForSymbol(TargetSymbol); + if (!Callee) { + /* Ignore internal calls */ + continue; + } + + // Check if the callee is in the same cluster. + if (!FC.Callees.count(Callee)) + continue; + + // Use thunk as the call destination. + BC.MIB->replaceBranchTarget(Inst, Thunks[Callee]->getSymbol(), + BC.Ctx.get()); + } + } + } + } + + BC.setThunkLocation(Clusters[0].LastBF); +} + Error LongJmpPass::runOnFunctions(BinaryContext &BC) { - if (opts::CompactCodeModel) { + if (opts::CompactCodeModel || opts::ExperimentalRelaxation) { BC.outs() << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n"; + // TODO: set correct code model based on the total size of split-code. ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { relaxLocalBranches(BF); }; @@ -916,6 +1092,12 @@ Error LongJmpPass::runOnFunctions(BinaryContext &BC) { BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, SkipPredicate, "RelaxLocalBranches"); + if (!opts::ExperimentalRelaxation) + return Error::success(); + + BC.outs() << "BOLT-INFO: starting experimental relaxation pass\n"; + relaxCalls(BC); + return Error::success(); } diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp index 981d1b70af90..1530d1076bb0 100644 --- a/bolt/lib/Passes/PatchEntries.cpp +++ b/bolt/lib/Passes/PatchEntries.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "bolt/Passes/PatchEntries.h" +#include "bolt/Utils/CommandLineOpts.h" #include "bolt/Utils/NameResolver.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" @@ -35,16 +36,20 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { if (!opts::ForcePatch) { // Mark the binary for patching if we did not create external references // for original code in any of functions we are not going to emit. - bool NeedsPatching = llvm::any_of( - llvm::make_second_range(BC.getBinaryFunctions()), - [&](BinaryFunction &BF) { - return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations(); - }); + bool NeedsPatching = + llvm::any_of(llvm::make_second_range(BC.getBinaryFunctions()), + [&](BinaryFunction &BF) { + return !BF.isPseudo() && !BC.shouldEmit(BF) && + !BF.hasExternalRefRelocations(); + }); if (!NeedsPatching) return Error::success(); } + assert(!opts::UseOldText && + "Cannot patch entries while overwriting original .text"); + if (opts::Verbosity >= 1) BC.outs() << "BOLT-INFO: patching entries in original code\n"; diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index b09060418334..d823503d2f9c 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -54,6 +54,9 @@ extern cl::opt<bool> PrintDynoStats; extern cl::opt<bool> DumpDotAll; extern cl::opt<std::string> AsmDump; extern cl::opt<bolt::PLTCall::OptType> PLT; +extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, + llvm::bolt::DeprecatedICFNumericOptionParser> + ICF; static cl::opt<bool> DynoStatsAll("dyno-stats-all", @@ -65,9 +68,6 @@ static cl::opt<bool> cl::desc("eliminate unreachable code"), cl::init(true), cl::cat(BoltOptCategory)); -cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"), - cl::cat(BoltOptCategory)); - static cl::opt<bool> JTFootprintReductionFlag( "jt-footprint-reduction", cl::desc("make jump tables size smaller at the cost of using more " @@ -398,7 +398,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { opts::StripRepRet); Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), - opts::ICF); + opts::ICF != IdenticalCodeFolding::ICFLevel::None); Manager.registerPass( std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), @@ -423,7 +423,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique<Inliner>(PrintInline)); Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), - opts::ICF); + opts::ICF != IdenticalCodeFolding::ICFLevel::None); Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); @@ -492,6 +492,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { // memory profiling data. Manager.registerPass(std::make_unique<ReorderData>()); + // Assign each function an output section. + Manager.registerPass(std::make_unique<AssignSections>()); + if (BC.isAArch64()) { Manager.registerPass(std::make_unique<ADRRelaxationPass>()); @@ -515,9 +518,6 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass( std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); - // Assign each function an output section. - Manager.registerPass(std::make_unique<AssignSections>()); - // Patch original function entries if (BC.HasRelocations) Manager.registerPass(std::make_unique<PatchEntries>()); diff --git a/bolt/lib/Rewrite/BoltDiff.cpp b/bolt/lib/Rewrite/BoltDiff.cpp index 74b5ca18abce..35f671050664 100644 --- a/bolt/lib/Rewrite/BoltDiff.cpp +++ b/bolt/lib/Rewrite/BoltDiff.cpp @@ -28,7 +28,9 @@ using namespace bolt; namespace opts { extern cl::OptionCategory BoltDiffCategory; extern cl::opt<bool> NeverPrint; -extern cl::opt<bool> ICF; +extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, + llvm::bolt::DeprecatedICFNumericOptionParser> + ICF; static cl::opt<bool> IgnoreLTOSuffix( "ignore-lto-suffix", @@ -697,7 +699,7 @@ void RewriteInstance::compare(RewriteInstance &RI2) { } // Pre-pass ICF - if (opts::ICF) { + if (opts::ICF != IdenticalCodeFolding::ICFLevel::None) { IdenticalCodeFolding ICF(opts::NeverPrint); outs() << "BOLT-DIFF: Starting ICF pass for binary 1"; BC->logBOLTErrorsAndQuitOnFatal(ICF.runOnFunctions(*BC)); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 76e1f0156f82..80ecbcc94282 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -19,6 +19,7 @@ #include "bolt/Core/Relocation.h" #include "bolt/Passes/BinaryPasses.h" #include "bolt/Passes/CacheMetrics.h" +#include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/ReorderFunctions.h" #include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Profile/DataAggregator.h" @@ -86,6 +87,9 @@ extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; extern cl::opt<bool> TerminalTrap; extern cl::opt<bool> TimeBuild; extern cl::opt<bool> TimeRewrite; +extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, + llvm::bolt::DeprecatedICFNumericOptionParser> + ICF; cl::opt<bool> AllowStripped("allow-stripped", cl::desc("allow processing of stripped binaries"), @@ -2051,6 +2055,13 @@ void RewriteInstance::adjustCommandLineOptions() { exit(1); } + if (!BC->HasRelocations && + opts::ICF == IdenticalCodeFolding::ICFLevel::Safe) { + BC->errs() << "BOLT-ERROR: binary built without relocations. Safe ICF is " + "not supported\n"; + exit(1); + } + if (opts::Instrument || (opts::ReorderFunctions != ReorderFunctions::RT_NONE && !opts::HotText.getNumOccurrences())) { @@ -2424,6 +2435,13 @@ void RewriteInstance::readDynamicRelocations(const SectionRef &Section, if (Symbol) SymbolIndex[Symbol] = getRelocationSymbol(InputFile, Rel); + // Workaround for AArch64 issue with hot text. + if (BC->isAArch64() && (SymbolName == "__hot_start" || + SymbolName == "__hot_end")) { + BC->addRelocation(Rel.getOffset(), Symbol, ELF::R_AARCH64_ABS64, Addend); + continue; + } + BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend); } } @@ -3777,15 +3795,41 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) { return Address; }; + // Try to allocate sections before the \p Address and return an address for + // the allocation of the first section or 0 if \p is not big enough. + auto allocateBefore = [&](uint64_t Address) -> uint64_t { + for (auto SI = CodeSections.rbegin(), SE = CodeSections.rend(); SI != SE; + ++SI) { + BinarySection *Section = *SI; + if (Section->getOutputSize() > Address) + return 0; + Address -= Section->getOutputSize(); + Address = alignDown(Address, Section->getAlignment()); + Section->setOutputAddress(Address); + } + return Address; + }; + // Check if we can fit code in the original .text bool AllocationDone = false; if (opts::UseOldText) { - const uint64_t CodeSize = - allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; + uint64_t StartAddress; + uint64_t EndAddress; + if (opts::HotFunctionsAtEnd) { + EndAddress = BC->OldTextSectionAddress + BC->OldTextSectionSize; + StartAddress = allocateBefore(EndAddress); + } else { + StartAddress = BC->OldTextSectionAddress; + EndAddress = allocateAt(BC->OldTextSectionAddress); + } + const uint64_t CodeSize = EndAddress - StartAddress; if (CodeSize <= BC->OldTextSectionSize) { BC->outs() << "BOLT-INFO: using original .text for new code with 0x" - << Twine::utohexstr(opts::AlignText) << " alignment\n"; + << Twine::utohexstr(opts::AlignText) << " alignment"; + if (StartAddress != BC->OldTextSectionAddress) + BC->outs() << " at 0x" << Twine::utohexstr(StartAddress); + BC->outs() << '\n'; AllocationDone = true; } else { BC->errs() @@ -4062,6 +4106,11 @@ void RewriteInstance::patchELFPHDRTable() { NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress; } + if (!NewTextSegmentSize && !NewWritableSegmentSize) { + BC->outs() << "BOLT-INFO: not adding new segments\n"; + return; + } + const uint64_t SavedPos = OS.tell(); OS.seek(PHDRTableOffset); diff --git a/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s b/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s new file mode 100644 index 000000000000..8c9817ce91ed --- /dev/null +++ b/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s @@ -0,0 +1,568 @@ +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %tmain.o +# RUN: %clang %cflags -gdwarf-5 %tmain.o -o %tmain.exe +# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-names %tmain.exe.bolt > %tlog.txt +# RUN: cat %tlog.txt | FileCheck -check-prefix=BOLT %s + +## Tests that bolt can correctly generate debug_names when there is an DW_TAG_inlined_subroutine +## with DW_AT_abstract_origin that points to DW_TAG_subprogram that only has DW_AT_linkage_name. + +# BOLT: Name Index @ 0x0 { +# BOLT-NEXT: Header { +# BOLT-NEXT: Length: 0xA2 +# BOLT-NEXT: Format: DWARF32 +# BOLT-NEXT: Version: 5 +# BOLT-NEXT: CU count: 1 +# BOLT-NEXT: Local TU count: 0 +# BOLT-NEXT: Foreign TU count: 0 +# BOLT-NEXT: Bucket count: 4 +# BOLT-NEXT: Name count: 4 +# BOLT-NEXT: Abbreviations table size: 0x19 +# BOLT-NEXT: Augmentation: 'BOLT' +# BOLT-NEXT: } +# BOLT-NEXT: Compilation Unit offsets [ +# BOLT-NEXT: CU[0]: 0x00000000 +# BOLT-NEXT: ] +# BOLT-NEXT: Abbreviations [ +# BOLT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_ref4 +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 0 [ +# BOLT-NEXT: Name 1 { +# BOLT-NEXT: Hash: 0xB888030 +# BOLT-NEXT: String: {{.+}} "int" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: 0x1 +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: 0x0000004a +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 1 [ +# BOLT-NEXT: EMPTY +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 2 [ +# BOLT-NEXT: Name 2 { +# BOLT-NEXT: Hash: 0x7C9A7F6A +# BOLT-NEXT: String: {{.+}} "main" +# BOLT-NEXT: Entry @ [[REF1:0x[0-9a-f]*]] { +# BOLT-NEXT: Abbrev: [[ABBREV2]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x0000004e +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: Name 3 { +# BOLT-NEXT: Hash: 0xB5063CFE +# BOLT-NEXT: String: {{.+}} "_Z3fooi" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV2]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x00000024 +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: Entry @ 0x96 { +# BOLT-NEXT: Abbrev: [[ABBREV3]] +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: 0x0000007e +# BOLT-NEXT: DW_IDX_parent: Entry @ [[REF1]] +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 3 [ +# BOLT-NEXT: Name 4 { +# BOLT-NEXT: Hash: 0x7C952063 +# BOLT-NEXT: String: {{.+}} "char" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: 0x0000009f +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> + +## int foo(int i) { +## return i ++; +## } +## int main(int argc, char* argv[]) { +## int i = 0; +## [[clang::always_inline]] i = foo(argc); +## return i; +## } +## Test was manually modified so that DW_TAG_subprogram only had DW_AT_linkage_name. + + .text + .file "main.cpp" + .globl _Z3fooi + .p2align 4, 0x90 + .type _Z3fooi,@function +_Z3fooi: +.Lfunc_begin0: + .file 0 "/abstractChain" "main.cpp" md5 0x2e29d55fc1320801a8057a4c50643ea1 + .loc 0 1 0 + .loc 0 2 12 prologue_end + .loc 0 2 3 epilogue_begin is_stmt 0 + retq +.Lfunc_end0: + .size _Z3fooi, .Lfunc_end0-_Z3fooi + + .globl main + .p2align 4, 0x90 + .type main,@function +main: +.Lfunc_begin1: + .loc 0 4 0 is_stmt 1 +.Ltmp2: + .loc 0 5 7 prologue_end + .loc 0 6 36 + movl -12(%rbp), %eax +.Ltmp3: + .loc 0 2 12 +.Ltmp4: + .loc 0 6 30 + .loc 0 7 10 + .loc 0 7 3 epilogue_begin is_stmt 0 + retq +.Ltmp5: +.Lfunc_end1: + .size main, .Lfunc_end1-main + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + #.byte 3 # DW_AT_name + #.byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 32 # DW_AT_inline + .byte 33 # DW_FORM_implicit_const + .byte 1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 1 # DW_CHILDREN_yes + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x98 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0x15 DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 56 # DW_AT_abstract_origin + .byte 3 # Abbrev [3] 0x2f:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long 64 # DW_AT_abstract_origin Manually Modified + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x38:0x12 DW_TAG_subprogram + .byte 3 # DW_AT_linkage_name + #.byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 74 # DW_AT_type + # DW_AT_external + # DW_AT_inline + .byte 5 # Abbrev [5] 0x41:0x8 DW_TAG_formal_parameter + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 74 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x4a:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 7 # Abbrev [7] 0x4e:0x47 DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 73 # DW_AT_type Manually Modified + # DW_AT_external + .byte 8 # Abbrev [8] 0x5d:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 73 # DW_AT_type Manually Modified + .byte 8 # Abbrev [8] 0x68:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 104 + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 148 # DW_AT_type Manually Modified + .byte 9 # Abbrev [9] 0x73:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 100 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 73 # DW_AT_type Manually Modified + .byte 10 # Abbrev [10] 0x7e:0x16 DW_TAG_inlined_subroutine + .long 56 # DW_AT_abstract_origin + .byte 2 # DW_AT_low_pc + .long .Ltmp4-.Ltmp3 # DW_AT_high_pc + .byte 0 # DW_AT_call_file + .byte 6 # DW_AT_call_line + .byte 32 # DW_AT_call_column + .byte 3 # Abbrev [3] 0x8b:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long 64 # DW_AT_abstract_origin Manually Modified + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 11 # Abbrev [11] 0x95:0x5 DW_TAG_pointer_type + .long 153 # DW_AT_type Manually Modified + .byte 11 # Abbrev [11] 0x9a:0x5 DW_TAG_pointer_type + .long 158 # DW_AT_type Manually Modified + .byte 6 # Abbrev [6] 0x9f:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 48 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 20.0.0git" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=24 +.Linfo_string2: + .asciz "/abstractChain" # string offset=33 +.Linfo_string3: + .asciz "foo" # string offset=85 +.Linfo_string4: + .asciz "_Z3fooi" # string offset=89 +.Linfo_string5: + .asciz "int" # string offset=97 +.Linfo_string6: + .asciz "i" # string offset=101 +.Linfo_string7: + .asciz "main" # string offset=103 +.Linfo_string8: + .asciz "argc" # string offset=108 +.Linfo_string9: + .asciz "argv" # string offset=113 +.Linfo_string10: + .asciz "char" # string offset=118 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string4 + .long .Linfo_string3 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad .Ltmp3 +.Ldebug_addr_end0: + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: unit length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 5 # Header: bucket count + .long 5 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 8 # Header: augmentation string size + .ascii "LLVM0700" # Header: augmentation string + .long .Lcu_begin0 # Compilation unit 0 + .long 0 # Bucket 0 + .long 1 # Bucket 1 + .long 0 # Bucket 2 + .long 3 # Bucket 3 + .long 4 # Bucket 4 + .long 2090499946 # Hash in Bucket 1 + .long -1257882370 # Hash in Bucket 1 + .long 193495088 # Hash in Bucket 3 + .long 193491849 # Hash in Bucket 4 + .long 2090147939 # Hash in Bucket 4 + .long .Linfo_string7 # String in Bucket 1: main + .long .Linfo_string4 # String in Bucket 1: _Z3fooi + .long .Linfo_string5 # String in Bucket 3: int + .long .Linfo_string3 # String in Bucket 4: foo + .long .Linfo_string10 # String in Bucket 4: char + .long .Lnames3-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames1-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames2-.Lnames_entries0 # Offset in Bucket 3 + .long .Lnames0-.Lnames_entries0 # Offset in Bucket 4 + .long .Lnames4-.Lnames_entries0 # Offset in Bucket 4 +.Lnames_abbrev_start0: + .byte 1 # Abbrev code + .byte 46 # DW_TAG_subprogram + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 2 # Abbrev code + .byte 29 # DW_TAG_inlined_subroutine + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 19 # DW_FORM_ref4 + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 3 # Abbrev code + .byte 36 # DW_TAG_base_type + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames3: +.L2: + .byte 1 # Abbreviation code + .long 78 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: main +.Lnames1: +.L0: + .byte 1 # Abbreviation code + .long 35 # DW_IDX_die_offset +.L3: # DW_IDX_parent + .byte 2 # Abbreviation code + .long 126 # DW_IDX_die_offset + .long .L2-.Lnames_entries0 # DW_IDX_parent + .byte 0 # End of list: _Z3fooi +.Lnames2: +.L1: + .byte 3 # Abbreviation code + .long 74 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: int +.Lnames0: + .byte 1 # Abbreviation code + .long 35 # DW_IDX_die_offset + .byte 2 # DW_IDX_parent + # Abbreviation code + .long 126 # DW_IDX_die_offset + .long .L2-.Lnames_entries0 # DW_IDX_parent + .byte 0 # End of list: foo +.Lnames4: +.L4: + .byte 3 # Abbreviation code + .long 159 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: char + .p2align 2, 0x0 +.Lnames_end0: + .ident "clang version 20.0.0git" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s b/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s new file mode 100644 index 000000000000..2075640d6761 --- /dev/null +++ b/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s @@ -0,0 +1,829 @@ +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %tmain.o +# RUN: %clang %cflags -gdwarf-5 %tmain.o -o %tmain.exe +# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-names %tmain.exe.bolt > %tlog.txt +# RUN: cat %tlog.txt | FileCheck -check-prefix=BOLT %s + +## This test checks that BOLT correctly generates .debug_names section when there is transative +## DW_AT_name/DW_AT_linkage_name resolution. + +# BOLT: Abbreviations [ +# BOLT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_class_type +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_ref4 +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 0 [ +# BOLT-NEXT: Name 1 { +# BOLT-NEXT: Hash: 0xD72418AA +# BOLT-NEXT: String: {{.+}} "_ZL3fooi" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x000000ba +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 1 [ +# BOLT-NEXT: Name 2 { +# BOLT-NEXT: Hash: 0x10614A06 +# BOLT-NEXT: String: {{.+}} "State" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV2]] +# BOLT-NEXT: Tag: DW_TAG_class_type +# BOLT-NEXT: DW_IDX_die_offset: 0x0000002b +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: Entry @ [[REF1:0x[0-9a-f]*]] { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x00000089 +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV3]] +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: 0x000000a3 +# BOLT-NEXT: DW_IDX_parent: Entry @ [[REF1]] +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 2 [ +# BOLT-NEXT: EMPTY +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 3 [ +# BOLT-NEXT: Name 3 { +# BOLT-NEXT: Hash: 0xB888030 +# BOLT-NEXT: String: {{.+}} "int" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV4]] +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: 0x00000085 +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: Name 4 { +# BOLT-NEXT: Hash: 0x7C9A7F6A +# BOLT-NEXT: String: {{.+}} "main" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x00000042 +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 4 [ +# BOLT-NEXT: EMPTY +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 5 [ +# BOLT-NEXT: Name 5 { +# BOLT-NEXT: Hash: 0xB887389 +# BOLT-NEXT: String: {{.+}} "foo" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x000000ba +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: Name 6 { +# BOLT-NEXT: Hash: 0x7C952063 +# BOLT-NEXT: String: {{.+}} "char" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV4]] +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: 0x000000d9 +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: Name 7 { +# BOLT-NEXT: Hash: 0xFBBDC812 +# BOLT-NEXT: String: {{.+}} "_ZN5StateC2Ev" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x00000089 +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> +# BOLT-NEXT: } +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV3]] +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: 0x000000a3 +# BOLT-NEXT: DW_IDX_parent: Entry @ [[REF1]] + +## static int foo(int i) { +## return i ++; +## } +## class State { +## public: +## State() {[[clang::always_inline]] foo(3);} +## }; +## +## int main(int argc, char* argv[]) { +## State S; +## return 0; +## } + +## Test manually modified to redirect DW_TAG_inlined_subroutine to DW_TAG_subprogram with DW_AT_specification. + + .text + .file "main.cpp" + .file 0 "abstractChainTwo" "main.cpp" md5 0x17ad726b6a1fd49ee59559a1302da539 + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .loc 0 9 0 # main.cpp:9:0 +.Ltmp0: + .loc 0 10 9 prologue_end # main.cpp:10:9 + callq _ZN5StateC2Ev + .loc 0 11 3 # main.cpp:11:3 + .loc 0 11 3 epilogue_begin is_stmt 0 # main.cpp:11:3 + retq +.Ltmp1: +.Lfunc_end0: + .size main, .Lfunc_end0-main + # -- End function + .section .text._ZN5StateC2Ev,"axG",@progbits,_ZN5StateC2Ev,comdat + .weak _ZN5StateC2Ev # -- Begin function _ZN5StateC2Ev + .p2align 4, 0x90 + .type _ZN5StateC2Ev,@function +_ZN5StateC2Ev: # @_ZN5StateC2Ev +.Lfunc_begin1: + .loc 0 6 0 is_stmt 1 # main.cpp:6:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -16(%rbp) + movl $3, -4(%rbp) +.Ltmp2: + .loc 0 2 12 prologue_end # main.cpp:2:12 + movl -4(%rbp), %eax + addl $1, %eax + movl %eax, -4(%rbp) +.Ltmp3: + .loc 0 6 44 epilogue_begin # main.cpp:6:44 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp4: +.Lfunc_end1: + .size _ZN5StateC2Ev, .Lfunc_end1-_ZN5StateC2Ev + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _ZL3fooi + .type _ZL3fooi,@function +_ZL3fooi: # @_ZL3fooi +.Lfunc_begin2: + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp5: + .loc 0 2 12 prologue_end # main.cpp:2:12 + movl -4(%rbp), %eax + movl %eax, %ecx + addl $1, %ecx + movl %ecx, -4(%rbp) + .loc 0 2 3 epilogue_begin is_stmt 0 # main.cpp:2:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp6: +.Lfunc_end2: + .size _ZL3fooi, .Lfunc_end2-_ZL3fooi + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 2 # DW_TAG_class_type + .byte 1 # DW_CHILDREN_yes + .byte 54 # DW_AT_calling_convention + .byte 11 # DW_FORM_data1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 50 # DW_AT_accessibility + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 52 # DW_AT_artificial + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 32 # DW_AT_inline + .byte 33 # DW_FORM_implicit_const + .byte 1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 12 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 100 # DW_AT_object_pointer + .byte 19 # DW_FORM_ref4 + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 71 # DW_AT_specification + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 13 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 52 # DW_AT_artificial + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 14 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 1 # DW_CHILDREN_yes + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 15 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 16 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0xd7 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .byte 0 # DW_AT_ranges + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] 0x2b:0x12 DW_TAG_class_type + .byte 5 # DW_AT_calling_convention + .byte 3 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .byte 3 # Abbrev [3] 0x31:0xb DW_TAG_subprogram + .byte 3 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 6 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 1 # DW_AT_accessibility + # DW_ACCESS_public + .byte 4 # Abbrev [4] 0x36:0x5 DW_TAG_formal_parameter + .long 61 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x3d:0x5 DW_TAG_pointer_type + .long 43 # DW_AT_type + .byte 6 # Abbrev [6] 0x42:0x31 DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 133 # DW_AT_type + # DW_AT_external + .byte 7 # Abbrev [7] 0x51:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 10 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 133 # DW_AT_type + .byte 7 # Abbrev [7] 0x5c:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 11 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 207 # DW_AT_type + .byte 8 # Abbrev [8] 0x67:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 111 + .byte 13 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 10 # DW_AT_decl_line + .long 43 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 9 # Abbrev [9] 0x73:0x12 DW_TAG_subprogram + .byte 4 # DW_AT_linkage_name + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 133 # DW_AT_type + # DW_AT_inline + .byte 10 # Abbrev [10] 0x7c:0x8 DW_TAG_formal_parameter + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 133 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 11 # Abbrev [11] 0x85:0x4 DW_TAG_base_type + .byte 6 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 12 # Abbrev [12] 0x89:0x31 DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 154 # DW_AT_object_pointer + .byte 9 # DW_AT_linkage_name + .long 49 # DW_AT_specification + .byte 13 # Abbrev [13] 0x9a:0x9 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 14 # DW_AT_name + .long 221 # DW_AT_type + # DW_AT_artificial + .byte 14 # Abbrev [14] 0xa3:0x16 DW_TAG_inlined_subroutine + .long 137 # DW_AT_abstract_origin Manually Modified + .byte 2 # DW_AT_low_pc + .long .Ltmp3-.Ltmp2 # DW_AT_high_pc + .byte 0 # DW_AT_call_file + .byte 6 # DW_AT_call_line + .byte 37 # DW_AT_call_column + .byte 15 # Abbrev [15] 0xb0:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long 124 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 16 # Abbrev [16] 0xba:0x15 DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end2-.Lfunc_begin2 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 115 # DW_AT_abstract_origin + .byte 15 # Abbrev [15] 0xc6:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long 124 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0xcf:0x5 DW_TAG_pointer_type + .long 212 # DW_AT_type + .byte 5 # Abbrev [5] 0xd4:0x5 DW_TAG_pointer_type + .long 217 # DW_AT_type + .byte 11 # Abbrev [11] 0xd9:0x4 DW_TAG_base_type + .byte 12 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 5 # Abbrev [5] 0xdd:0x5 DW_TAG_pointer_type + .long 43 # DW_AT_type + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 1 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 1 # DW_RLE_base_addressx + .byte 0 # base address index + .byte 4 # DW_RLE_offset_pair + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end0-.Lfunc_begin0 # ending offset + .byte 4 # DW_RLE_offset_pair + .uleb128 .Lfunc_begin2-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end2-.Lfunc_begin0 # ending offset + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_str_offsets,"",@progbits + .long 64 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 20.0.0git" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=24 +.Linfo_string2: + .asciz "abstractChainTwo" # string offset=33 +.Linfo_string3: + .asciz "State" # string offset=88 +.Linfo_string4: + .asciz "main" # string offset=94 +.Linfo_string5: + .asciz "_ZL3fooi" # string offset=99 +.Linfo_string6: + .asciz "foo" # string offset=108 +.Linfo_string7: + .asciz "int" # string offset=112 +.Linfo_string8: + .asciz "i" # string offset=116 +.Linfo_string9: + .asciz "_ZN5StateC2Ev" # string offset=118 +.Linfo_string10: + .asciz "argc" # string offset=132 +.Linfo_string11: + .asciz "argv" # string offset=137 +.Linfo_string12: + .asciz "char" # string offset=142 +.Linfo_string13: + .asciz "S" # string offset=147 +.Linfo_string14: + .asciz "this" # string offset=149 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string4 + .long .Linfo_string9 + .long .Linfo_string10 + .long .Linfo_string11 + .long .Linfo_string12 + .long .Linfo_string13 + .long .Linfo_string14 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad .Ltmp2 + .quad .Lfunc_begin2 +.Ldebug_addr_end0: + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: unit length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 7 # Header: bucket count + .long 7 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 8 # Header: augmentation string size + .ascii "LLVM0700" # Header: augmentation string + .long .Lcu_begin0 # Compilation unit 0 + .long 1 # Bucket 0 + .long 2 # Bucket 1 + .long 0 # Bucket 2 + .long 3 # Bucket 3 + .long 0 # Bucket 4 + .long 5 # Bucket 5 + .long 0 # Bucket 6 + .long -685500246 # Hash in Bucket 0 + .long 274811398 # Hash in Bucket 1 + .long 193495088 # Hash in Bucket 3 + .long 2090499946 # Hash in Bucket 3 + .long 193491849 # Hash in Bucket 5 + .long 2090147939 # Hash in Bucket 5 + .long -71448558 # Hash in Bucket 5 + .long .Linfo_string5 # String in Bucket 0: _ZL3fooi + .long .Linfo_string3 # String in Bucket 1: State + .long .Linfo_string7 # String in Bucket 3: int + .long .Linfo_string4 # String in Bucket 3: main + .long .Linfo_string6 # String in Bucket 5: foo + .long .Linfo_string12 # String in Bucket 5: char + .long .Linfo_string9 # String in Bucket 5: _ZN5StateC2Ev + .long .Lnames5-.Lnames_entries0 # Offset in Bucket 0 + .long .Lnames0-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames2-.Lnames_entries0 # Offset in Bucket 3 + .long .Lnames1-.Lnames_entries0 # Offset in Bucket 3 + .long .Lnames4-.Lnames_entries0 # Offset in Bucket 5 + .long .Lnames6-.Lnames_entries0 # Offset in Bucket 5 + .long .Lnames3-.Lnames_entries0 # Offset in Bucket 5 +.Lnames_abbrev_start0: + .byte 1 # Abbrev code + .byte 29 # DW_TAG_inlined_subroutine + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 19 # DW_FORM_ref4 + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 2 # Abbrev code + .byte 46 # DW_TAG_subprogram + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 3 # Abbrev code + .byte 2 # DW_TAG_class_type + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 4 # Abbrev code + .byte 36 # DW_TAG_base_type + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames5: +.L1: + .byte 1 # Abbreviation code + .long 163 # DW_IDX_die_offset + .long .L2-.Lnames_entries0 # DW_IDX_parent +.L0: + .byte 2 # Abbreviation code + .long 186 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: _ZL3fooi +.Lnames0: +.L5: + .byte 3 # Abbreviation code + .long 43 # DW_IDX_die_offset +.L2: # DW_IDX_parent + .byte 2 # Abbreviation code + .long 137 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: State +.Lnames2: +.L4: + .byte 4 # Abbreviation code + .long 133 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: int +.Lnames1: +.L6: + .byte 2 # Abbreviation code + .long 66 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: main +.Lnames4: + .byte 1 # Abbreviation code + .long 163 # DW_IDX_die_offset + .long .L2-.Lnames_entries0 # DW_IDX_parent + .byte 2 # Abbreviation code + .long 186 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: foo +.Lnames6: +.L3: + .byte 4 # Abbreviation code + .long 217 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: char +.Lnames3: + .byte 2 # Abbreviation code + .long 137 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: _ZN5StateC2Ev + .p2align 2, 0x0 +.Lnames_end0: + .ident "clang version 20.0.0git" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/dwarf5-debug-names-gnu-push-tls-address.s b/bolt/test/X86/dwarf5-debug-names-gnu-push-tls-address.s new file mode 100644 index 000000000000..3f6ce71a7994 --- /dev/null +++ b/bolt/test/X86/dwarf5-debug-names-gnu-push-tls-address.s @@ -0,0 +1,327 @@ +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %tmain.o +# RUN: %clang %cflags -gdwarf-5 %tmain.o -o %tmain.exe +# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-names --debug-info %tmain.exe.bolt > %tlog.txt +# RUN: cat %tlog.txt | FileCheck -check-prefix=BOLT %s + +## This test checks that BOLT correctly generates .debug_names section when there is DW_TAG_variable +## with DW_OP_GNU_push_tls_address in DW_AT_location. + +# BOLT: [[DIEOFFSET:0x[0-9a-f]*]]: DW_TAG_variable +# BOLT-NEXT: DW_AT_name ("x") +# BOLT-NEXT: DW_AT_type ({{.+}} "int") +# BOLT-NEXT: DW_AT_external (true) +# BOLT-NEXT: DW_AT_decl_file ("gnu_tls_push/main.cpp") +# BOLT-NEXT: DW_AT_decl_line (1) +# BOLT-NEXT: DW_AT_location (DW_OP_const8u 0x0, DW_OP_GNU_push_tls_address) +# BOLT: Hash: 0x2B61D +# BOLT-NEXT: String: {{.+}} "x" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: {{.+}} +# BOLT-NEXT: Tag: DW_TAG_variable +# BOLT-NEXT: DW_IDX_die_offset: [[DIEOFFSET]] +# BOLT-NEXT: DW_IDX_parent: <parent not indexed> + +## thread_local int x = 0; +## int main() { +## x = 10; +## return x; +## } + .text + .file "main.cpp" + .file 0 "gnu_tls_push" "main.cpp" md5 0x551db97d5e23dc6a81abdc5ade4d9d71 + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .loc 0 2 0 # main.cpp:2:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl $0, -4(%rbp) +.Ltmp0: + .loc 0 3 3 prologue_end # main.cpp:3:3 + movq %fs:0, %rax + leaq x@TPOFF(%rax), %rax + .loc 0 3 5 is_stmt 0 # main.cpp:3:5 + movl $10, (%rax) + .loc 0 4 10 is_stmt 1 # main.cpp:4:10 + movq %fs:0, %rax + leaq x@TPOFF(%rax), %rax + movl (%rax), %eax + .loc 0 4 3 epilogue_begin is_stmt 0 # main.cpp:4:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .text._ZTW1x,"axG",@progbits,_ZTW1x,comdat + .hidden _ZTW1x # -- Begin function _ZTW1x + .weak _ZTW1x + .p2align 4, 0x90 + .type _ZTW1x,@function +_ZTW1x: # @_ZTW1x +.Lfunc_begin1: + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %fs:0, %rax + leaq x@TPOFF(%rax), %rax + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Lfunc_end1: + .size _ZTW1x, .Lfunc_end1-_ZTW1x + .cfi_endproc + # -- End function + .type x,@object # @x + .section .tbss,"awT",@nobits + .globl x + .p2align 2, 0x0 +x: + .long 0 # 0x0 + .size x, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x3e DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0x13 DW_TAG_variable + .byte 3 # DW_AT_name + .long 54 # DW_AT_type + # DW_AT_external + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .byte 10 # DW_AT_location + .byte 14 + .quad x@DTPOFF + .byte 224 + .byte 3 # Abbrev [3] 0x36:0x4 DW_TAG_base_type + .byte 4 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 4 # Abbrev [4] 0x3a:0xf DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .long 54 # DW_AT_type + # DW_AT_external + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 28 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.4" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=137 +.Linfo_string2: + .asciz "gnu_tls_push" # string offset=146 +.Linfo_string3: + .asciz "x" # string offset=184 +.Linfo_string4: + .asciz "int" # string offset=186 +.Linfo_string5: + .asciz "main" # string offset=190 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: unit length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 3 # Header: bucket count + .long 3 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 8 # Header: augmentation string size + .ascii "LLVM0700" # Header: augmentation string + .long .Lcu_begin0 # Compilation unit 0 + .long 1 # Bucket 0 + .long 2 # Bucket 1 + .long 3 # Bucket 2 + .long 177693 # Hash in Bucket 0 + .long 2090499946 # Hash in Bucket 1 + .long 193495088 # Hash in Bucket 2 + .long .Linfo_string3 # String in Bucket 0: x + .long .Linfo_string5 # String in Bucket 1: main + .long .Linfo_string4 # String in Bucket 2: int + .long .Lnames1-.Lnames_entries0 # Offset in Bucket 0 + .long .Lnames2-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames0-.Lnames_entries0 # Offset in Bucket 2 +.Lnames_abbrev_start0: + .byte 1 # Abbrev code + .byte 52 # DW_TAG_variable + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 2 # Abbrev code + .byte 46 # DW_TAG_subprogram + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 3 # Abbrev code + .byte 36 # DW_TAG_base_type + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames1: +.L2: + .byte 1 # Abbreviation code + .long 35 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: x +.Lnames2: +.L0: + .byte 2 # Abbreviation code + .long 58 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: main +.Lnames0: +.L1: + .byte 3 # Abbreviation code + .long 54 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: int + .p2align 2, 0x0 +.Lnames_end0: + .ident "clang version 17.0.4 (https://git.internal.tfbnw.net/repos/git/rw/osmeta/external/llvm-project 8d1fd9f463cb31caf429b83cf7a5baea5f67e54a)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/icf-safe-icp.test b/bolt/test/X86/icf-safe-icp.test new file mode 100644 index 000000000000..a9227d311edc --- /dev/null +++ b/bolt/test/X86/icf-safe-icp.test @@ -0,0 +1,148 @@ +## Check that BOLT handles correctly folding functions with --icf=safe +## that can be referenced through a non control flow instruction when ICP optimization is enabled. +## This tests also checks that destructors are folded. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding Derived3Destructor into Derived2Destructor +# ICFCHECK-NEXT: folding Derived3Func into Derived2Func + +# SAFEICFCHECK: skipping function with reference taken Derived3Func +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: folding Derived3Destructor into Derived2Destructor +# SAFEICFCHECK-NEXT: ===--------- + + +## generate profile +## clang++ -O2 -fprofile-generate=. main.cpp -c -o mainProf.o +## PROF=test.profdata +## clang++ -m64 -fprofile-use=$PROF \ +## -mllvm -disable-icp=true -mllvm -print-after-all \ +## -g0 -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \ +## -fdebug-types-section \ +## main.cpp -c -o mainProfLTO.bc +## PASS='pgo-icall-prom' +## clang++ -m64 -fprofile-use=$PROF \ +## -O3 -Rpass=$PASS \ +## -mllvm -print-before=$PASS \ +## -mllvm -print-after=$PASS \ +## -mllvm -filter-print-funcs=main \ +## -mllvm -debug-only=$PASS \ +## -x ir \ +## mainProfLTO.bc -c -o mainProfFinal.o + +## class Base { +## public: +## virtual int func(int a, int b) const = 0; +## +## virtual ~Base() {}; +## }; +## +## class Derived2 : public Base { +## int c = 5; +## public: +## __attribute__((noinline)) int func(int a, int b)const override { return a * (a - b) + this->c; } +## +## ~Derived2() {} +## }; +## +## class Derived3 : public Base { +## int c = 500; +## public: +## __attribute__((noinline)) int func(int a, int b) const override { return a * (a - b) + this->c; } +## ~Derived3() {} +## }; +## +## __attribute__((noinline)) Base *createType(int a) { +## Base *base = nullptr; +## if (a == 4) +## base = new Derived2(); +## else +## base = new Derived3(); +## return base; +## } +## +## extern int returnFive(); +## extern int returnFourOrFive(int val); +## int main(int argc, char **argv) { +## int sum = 0; +## int a = returnFourOrFive(argc); +## int b = returnFive(); +## Base *ptr = createType(a); +## Base *ptr2 = createType(b); +## sum += ptr->func(b, a) + ptr2->func(b, a); +## return 0; +## } +## clang++ -c helper.cpp -o helper.o +## int FooVar = 1; +## int BarVar = 2; +## +## int fooGlobalFuncHelper(int a, int b) { +## return 5; +## } +## Manually modified to remove "extra" assembly. + .globl main + .type main,@function +main: + leaq Derived3Func(%rip), %rcx + callq Derived3Func + .size main, .-main + + .weak Derived2Func + .type Derived2Func,@function +Derived2Func: + imull %esi, %eax + retq + .size Derived2Func, .-Derived2Func + + .weak Derived2Destructor + .type Derived2Destructor,@function +Derived2Destructor: + jmp _ZdlPvm@PLT + .size Derived2Destructor, .-Derived2Destructor + + .weak Derived3Func + .type Derived3Func,@function +Derived3Func: + imull %esi, %eax + retq + .size Derived3Func, .-Derived3Func + + .weak _ZN4BaseD2Ev + .type _ZN4BaseD2Ev,@function +_ZN4BaseD2Ev: + retq + .size _ZN4BaseD2Ev, .-_ZN4BaseD2Ev + + .weak Derived3Destructor + .type Derived3Destructor,@function +Derived3Destructor: + jmp _ZdlPvm@PLT + .size Derived3Destructor, .-Derived3Destructor + + .type _ZTV8Derived2,@object + .section .data.rel.ro._ZTV8Derived2,"awG",@progbits,_ZTV8Derived2,comdat + .weak _ZTV8Derived2 +_ZTV8Derived2: + .quad 0 + .quad _ZTI8Derived2 + .quad Derived2Func + .quad _ZN4BaseD2Ev + .quad Derived2Destructor + .size _ZTV8Derived2, 40 + + .type _ZTV8Derived3,@object + .section .data.rel.ro._ZTV8Derived3,"awG",@progbits,_ZTV8Derived3,comdat + .weak _ZTV8Derived3 +_ZTV8Derived3: + .quad 0 + .quad _ZTI8Derived3 + .quad Derived3Func + .quad _ZN4BaseD2Ev + .quad Derived3Destructor + .size _ZTV8Derived3, 40 diff --git a/bolt/test/X86/icf-safe-process-rela-data.test b/bolt/test/X86/icf-safe-process-rela-data.test new file mode 100644 index 000000000000..cf71f5525777 --- /dev/null +++ b/bolt/test/X86/icf-safe-process-rela-data.test @@ -0,0 +1,64 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that are only referenced from a .rela.data section. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -no-pie +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc + +# SAFEICFCHECK: skipping function with reference taken fooAddFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: ===--------- + +## clang++ main.cpp +## Other functions removed for brevity. +## int main(int argc, char **argv) { +## const static int (*const funcGlobalBarAdd)(int, int) = barAddHdlper; +## const int (* const funcGlobalBarMul)(int, int) = fooGlobalFuncHelper; +## helper2(funcGlobalBarAdd, funcGlobalFooAdd, 3, 4) +## } +## Extra assembly removed. + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helperFunc + .type helperFunc,@function +helperFunc: + retq + .size helperFunc, .-helperFunc + + .globl main + .type main,@function +main: + movq localStaticVarBarAdd, %rdi + movq localStaticVarFooAdd, %rsi + callq helperFunc + retq + .size main, .-main + + .type localStaticVarBarAdd,@object # @localStaticVarBarAdd + .data +localStaticVarBarAdd: + .quad barAddFunc + .size localStaticVarBarAdd, 8 + + .type localStaticVarFooAdd,@object # @localStaticVarFooAdd +localStaticVarFooAdd: + .quad fooAddFunc + .size localStaticVarFooAdd, 8 diff --git a/bolt/test/X86/icf-safe-test1-no-relocs.test b/bolt/test/X86/icf-safe-test1-no-relocs.test new file mode 100644 index 000000000000..b4e55a6d5504 --- /dev/null +++ b/bolt/test/X86/icf-safe-test1-no-relocs.test @@ -0,0 +1,20 @@ +## Check that BOLT reports an error for a binary with no relocations with the --icf=safe option. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe +# RUN: not llvm-bolt --no-threads %t.exe --icf=safe -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# SAFEICFCHECK: BOLT-ERROR: binary built without relocations. Safe ICF is not supported + +## int main(int argc, char **argv) { +## return temp; +## } + .globl main + .type main,@function +main: + .cfi_startproc + retq +.Lfunc_end8: + .size main, .-main + .cfi_endproc diff --git a/bolt/test/X86/icf-safe-test1.test b/bolt/test/X86/icf-safe-test1.test new file mode 100644 index 000000000000..8a8e5ccf38e7 --- /dev/null +++ b/bolt/test/X86/icf-safe-test1.test @@ -0,0 +1,98 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that can be referenced by non-control flow instructions. +## It invokes BOLT twice first testing CFG path, and second when functions have to be disassembled. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf \ +# RUN: -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf \ +# RUN: -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf \ +# RUN: --skip-funcs=helper1Func,main -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECKNOCFG %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc +# ICFCHECK-NEXT: folding barSubFunc into fooSubFunc + +# SAFEICFCHECK: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: folding barSubFunc into fooSubFunc +# SAFEICFCHECK-NEXT: ===--------- + +# SAFEICFCHECKNOCFG: skipping function with reference taken barAddFunc +# SAFEICFCHECKNOCFG-NEXT: ICF iteration 1 +# SAFEICFCHECKNOCFG-NEXT: folding barSubFunc into fooSubFunc +# SAFEICFCHECKNOCFG-NEXT: ===--------- + +## clang++ -c main.cpp -o main.o +## extern int FooVar; +## extern int BarVar; +## [[clang::noinline]] +## int fooSub(int a, int b) { +## return a - b; +## } +## [[clang::noinline]] +## int barSub(int a, int b) { +## return a - b; +## } +## [[clang::noinline]] +## int fooAdd(int a, int b) { +## return a + b; +## } +## [[clang::noinline]] +## int barAdd(int a, int b) { +## return a + b; +## } +## int main(int argc, char **argv) { +## int temp = helper1(barAdd, FooVar, BarVar) + +## fooSub(FooVar, BarVar) + +## barSub(FooVar, BarVar) + fooAdd(FooVar, BarVar); +## return temp; +## } + .globl fooSubFunc + .type fooSubFunc,@function +fooSubFunc: + subl -8(%rbp), %eax + retq + .size fooSubFunc, .-fooSubFunc + + .globl barSubFunc + .type barSubFunc,@function +barSubFunc: + subl -8(%rbp), %eax + retq + .size barSubFunc, .-barSubFunc + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helper1Func + .type helper1Func,@function +helper1Func: + leaq barAddFunc(%rip), %rax + cmpq %rax, -16(%rbp) + retq + .size helper1Func, .-helper1Func + + .globl main + .type main,@function +main: + leaq barAddFunc(%rip), %rdi + callq helper1Func + callq fooSubFunc + callq barSubFunc + callq fooAddFunc + retq + .size main, .-main diff --git a/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test b/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test new file mode 100644 index 000000000000..ea2d8a5f11e0 --- /dev/null +++ b/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test @@ -0,0 +1,95 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that can be referenced by non-control flow instructions, +## when binary is built with -fno-PIC/-fno-PIE. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -no-pie +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc +# ICFCHECK-NEXT: folding barMulFunc into fooMulFunc + +# SAFEICFCHECK: skipping function with reference taken fooMulFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barMulFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: ===--------- + +## clang++ main.cpp -c -o -fno-PIC +## Similar code gets generated for external reference function. +## Other functions removed for brevity. +## const static int (*const funcGlobalBarAdd)(int, int) = barAdd; +## const int (*const funcGlobalBarMul)(int, int) = barMul; +## int main(int argc, char **argv) { +## int temp = helper1(funcGlobalBarAdd, FooVar, BarVar) +## return temp; +## } +## Manually modified to remove "extra" assembly. + .globl fooMulFunc + .type fooMulFunc,@function +fooMulFunc: + imull -8(%rbp), %eax + retq + .size fooMulFunc, .-fooMulFunc + + .globl barMulFunc + .type barMulFunc,@function +barMulFunc: + imull -8(%rbp), %eax + retq + .size barMulFunc, .-barMulFunc + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helperFunc + .type helperFunc,@function +helperFunc: + movabsq $barAddFunc, %rax + cmpq %rax, -16(%rbp) + retq + .size helperFunc, .-helperFunc + + .globl main + .type main,@function +main: + movl FooVar, %esi + movl BarVar, %edx + movabsq $barAddFunc, %rdi + callq helperFunc + movabsq $fooMulFunc, %rdi + movabsq $barMulFunc, %rsi + retq + .size main, .-main + + .type FooVar,@object + .data + .globl FooVar +FooVar: + .long 1 + .size FooVar, 4 + + .type BarVar,@object + .globl BarVar +BarVar: + .long 2 + .size BarVar, 4 + + .type .L.str,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "val: %d\n" + .size .L.str, 9 diff --git a/bolt/test/program-header.test b/bolt/test/program-header.test new file mode 100644 index 000000000000..4552303ea5af --- /dev/null +++ b/bolt/test/program-header.test @@ -0,0 +1,14 @@ +# Check that llvm-bolt does not add new segments when writing code in-place. + +REQUIRES: system-linux + +RUN: %clang %cflags %p/Inputs/hello.c -o %t -no-pie -Wl,-q +RUN: llvm-bolt %t -o %t.bolt --use-old-text --align-functions=1 \ +RUN: --no-huge-pages --align-text=1 --use-gnu-stack \ +RUN: | FileCheck %s --check-prefix=CHECK-BOLT +RUN: llvm-readelf -WS %t.bolt | FileCheck %s + +CHECK-BOLT: rewriting .eh_frame_hdr in-place +CHECK-BOLT: not adding new segments + +CHECK-NOT: .bolt.org.eh_frame_hdr diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 88862ae9edb2..51706bdc2c20 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5703,6 +5703,10 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">, MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>; def pipe : Flag<["-", "--"], "pipe">, HelpText<"Use pipes between commands, when possible">; +// Facebook T92898286 +def post_link_optimize : Flag<["--"], "post-link-optimize">, + HelpText<"Apply post-link optimizations using BOLT">; +// End Facebook T92898286 def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">; def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 8397f1121ec2..0db5858bbd0a 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -671,12 +671,41 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } + // Facebook T92898286 + if (Args.hasArg(options::OPT_post_link_optimize)) + CmdArgs.push_back("-q"); + // End Facebook T92898286 + Args.addAllArgs(CmdArgs, {options::OPT_T, options::OPT_t}); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); + // Facebook T92898286 + if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename()) + return; + + const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv")); + ArgStringList MoveCmdArgs; + MoveCmdArgs.push_back(Output.getFilename()); + const char *PreBoltBin = + Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt"); + MoveCmdArgs.push_back(PreBoltBin); + C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), + MvExec, MoveCmdArgs, std::nullopt)); + + ArgStringList BoltCmdArgs; + const char *BoltExec = + Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt")); + BoltCmdArgs.push_back(PreBoltBin); + BoltCmdArgs.push_back("-reorder-blocks=reverse"); + BoltCmdArgs.push_back("-update-debug-sections"); + BoltCmdArgs.push_back("-o"); + BoltCmdArgs.push_back(Output.getFilename()); + C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), + BoltExec, BoltCmdArgs, std::nullopt)); + // End Facebook T92898286 } void tools::gnutools::Assembler::ConstructJob(Compilation &C, diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index 9935fe6a199d..232d9811e00a 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -81,7 +81,13 @@ if is_msvc: # use_clang() and use_lld() respectively, so set them to "", if needed. if not hasattr(config, "clang_src_dir"): config.clang_src_dir = "" -llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) +# Facebook T92898286 +should_test_bolt = get_required_attr(config, "llvm_test_bolt") +if should_test_bolt: + llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"]) +else: + llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) +# End Facebook T92898286 if not hasattr(config, "lld_src_dir"): config.lld_src_dir = "" @@ -294,3 +300,9 @@ llvm_config.feature_config([("--build-mode", {"Debug|RelWithDebInfo": "debug-inf # Allow 'REQUIRES: XXX-registered-target' in tests. for arch in config.targets_to_build: config.available_features.add(arch.lower() + "-registered-target") + +# Facebook T92898286 +# Ensure the user's PYTHONPATH is included. +if "PYTHONPATH" in os.environ: + config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"] +# End Facebook T92898286 diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in index 39458dfc79af..2d53cd377f03 100644 --- a/cross-project-tests/lit.site.cfg.py.in +++ b/cross-project-tests/lit.site.cfg.py.in @@ -21,6 +21,10 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 06c685ebc3f5..9c2903504a18 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -268,6 +268,17 @@ if is_configured("lldb_libs_dir"): if is_configured("lldb_framework_dir"): dotest_cmd += ["--framework", config.lldb_framework_dir] +# Facebook T92898286 +if is_configured("llvm_test_bolt"): + dotest_cmd += ["-E", '"--post-link-optimize"'] +# End Facebook T92898286 + +if ( + "lldb-repro-capture" in config.available_features + or "lldb-repro-replay" in config.available_features +): + dotest_cmd += ["--skip-category=lldb-dap", "--skip-category=std-module"] + if "lldb-simulator-ios" in config.available_features: dotest_cmd += ["--apple-sdk", "iphonesimulator", "--platform-name", "ios-simulator"] elif "lldb-simulator-watchos" in config.available_features: diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index ecebc4477485..1c2ab4dbf083 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -1,5 +1,9 @@ @LIT_SITE_CFG_IN_HEADER@ +#Facebook T92898286 +import lit.util +#End Facebook T92898286 + config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -44,6 +48,10 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api") +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + # Plugins lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@' if lldb_build_intel_pt == '1': diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py index 42968128f270..ac895e8d03ed 100644 --- a/lldb/test/Shell/helper/toolchain.py +++ b/lldb/test/Shell/helper/toolchain.py @@ -241,6 +241,11 @@ def use_support_substitutions(config): "-lc++", ] + # Facebook T92898286 + if config.llvm_test_bolt: + host_flags += ["--post-link-optimize"] + # End Facebook T92898286 + host_flags = " ".join(host_flags) config.substitutions.append(("%clang_host", "%clang " + host_flags)) config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags)) diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index 31a6d68618b7..8b37d98a1efe 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -1,5 +1,10 @@ @LIT_SITE_CFG_IN_HEADER@ +#Facebook T92898286 +import lit.util +#End Facebook T92898286 + + config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -36,6 +41,10 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell") +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index f14065ab0379..d34ab73bbfcb 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -717,6 +717,10 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH option(LLVM_USE_SPLIT_DWARF "Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF) +# Facebook T92898286 +option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF) +# End Facebook T92898286 + # Define an option controlling whether we should build for 32-bit on 64-bit # platforms, where supported. if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX")) |
