diff options
| author | Peter Collingbourne <peter@pcc.me.uk> | 2025-05-06 19:02:10 -0700 |
|---|---|---|
| committer | Peter Collingbourne <peter@pcc.me.uk> | 2025-05-06 19:02:10 -0700 |
| commit | f1b639c9cad45d505c406c4018aa4f33a6fe4815 (patch) | |
| tree | 1f5fcd61fdb8ebf8a5c92516366722202e7b309f | |
| parent | f1750300aad0e49383cd4b206e2354f1300a40a8 (diff) | |
[𝘀𝗽𝗿] changes to main this commit is based onusers/pcc/spr/main.llvm-jitlink-fix-bug-in-target-address-computation
Created using spr 1.3.6-beta.1
[skip ci]
| -rw-r--r-- | lld/ELF/Arch/AArch64.cpp | 58 | ||||
| -rw-r--r-- | lld/ELF/Arch/TargetImpl.h | 87 | ||||
| -rw-r--r-- | lld/ELF/Arch/X86_64.cpp | 54 | ||||
| -rw-r--r-- | lld/ELF/Config.h | 1 | ||||
| -rw-r--r-- | lld/ELF/Driver.cpp | 2 | ||||
| -rw-r--r-- | lld/ELF/Options.td | 4 | ||||
| -rw-r--r-- | lld/ELF/Relocations.cpp | 8 | ||||
| -rw-r--r-- | lld/ELF/Target.h | 1 | ||||
| -rw-r--r-- | lld/docs/ld.lld.1 | 8 | ||||
| -rw-r--r-- | lld/test/ELF/aarch64-branch-to-branch.s | 61 | ||||
| -rw-r--r-- | lld/test/ELF/x86-64-branch-to-branch.s | 61 |
11 files changed, 341 insertions, 4 deletions
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 9538dd4a70ba..f3a24bd8a918 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -11,6 +11,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "TargetImpl.h" #include "lld/Common/ErrorHandler.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Endian.h" @@ -83,6 +84,7 @@ public: uint64_t val) const override; RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; + void applyBranchToBranchOpt() const override; private: void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; @@ -975,6 +977,62 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { } } +static std::optional<uint64_t> getControlTransferAddend(InputSection &is, + Relocation &r) { + // Identify a control transfer relocation for the branch-to-branch + // optimization. A "control transfer relocation" means a B or BL + // target but it also includes relative vtable relocations for example. + // + // We require the relocation type to be JUMP26, CALL26 or PLT32. With a + // relocation type of PLT32 the value may be assumed to be used for branching + // directly to the symbol and the addend is only used to produce the relocated + // value (hence the effective addend is always 0). This is because if a PLT is + // needed the addend will be added to the address of the PLT, and it doesn't + // make sense to branch into the middle of a PLT. For example, relative vtable + // relocations use PLT32 and 0 or a positive value as the addend but still are + // used to branch to the symbol. + // + // With JUMP26 or CALL26 the only reasonable interpretation of a non-zero + // addend is that we are branching to symbol+addend so that becomes the + // effective addend. + if (r.type == R_AARCH64_PLT32) + return 0; + if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26) + return r.addend; + return std::nullopt; +} + +static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is, + uint64_t offset) { + auto *i = std::lower_bound( + is.relocations.begin(), is.relocations.end(), offset, + [](Relocation &r, uint64_t offset) { return r.offset < offset; }); + if (i != is.relocations.end() && i->offset == offset && + i->type == R_AARCH64_JUMP26) { + return {i, i->addend}; + } + return {nullptr, 0}; +} + +static void mergeControlTransferRelocations(Relocation &r1, + const Relocation &r2) { + r1.expr = r2.expr; + r1.sym = r2.sym; + // With PLT32 we must respect the original addend as that affects the value's + // interpretation. With the other relocation types the original addend is + // irrelevant because it referred to an offset within the original target + // section so we overwrite it. + if (r1.type == R_AARCH64_PLT32) + r1.addend += r2.addend; + else + r1.addend = r2.addend; +} + +void AArch64::applyBranchToBranchOpt() const { + applyBranchToBranchOptImpl(ctx, getBranchInfo, getControlTransferAddend, + mergeControlTransferRelocations); +} + // AArch64 may use security features in variant PLT sequences. These are: // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target // Indicator (BTI) introduced in armv8.5-a. The additional instructions used diff --git a/lld/ELF/Arch/TargetImpl.h b/lld/ELF/Arch/TargetImpl.h new file mode 100644 index 000000000000..757c0e2c0c51 --- /dev/null +++ b/lld/ELF/Arch/TargetImpl.h @@ -0,0 +1,87 @@ +//===- TargetImpl.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ARCH_TARGETIMPL_H +#define LLD_ELF_ARCH_TARGETIMPL_H + +#include "InputFiles.h" +#include "InputSection.h" +#include "Relocations.h" +#include "Symbols.h" +#include "llvm/BinaryFormat/ELF.h" + +namespace lld { +namespace elf { + +// getControlTransferAddend: If this relocation is used for control transfer +// instructions (e.g. branch, branch-link or call) or code references (e.g. +// virtual function pointers) and indicates an address-insignificant reference, +// return the effective addend for the relocation, otherwise return +// std::nullopt. The effective addend for a relocation is the addend that is +// used to determine its branch destination. +// +// getBranchInfo: If a control transfer relocation referring to is+offset +// directly transfers control to a relocated branch instruction in the specified +// section, return the relocation for the branch target as well as its effective +// addend (see above). Otherwise return {nullptr, 0}. +// +// mergeControlTransferRelocations: Given r1, a relocation for which +// getControlTransferAddend() returned a value, and r2, a relocation returned by +// getBranchInfo(), modify r1 so that it branches directly to the target of r2. +template <typename GetBranchInfo, typename GetControlTransferAddend, + typename MergeControlTransferRelocations> +inline void applyBranchToBranchOptImpl( + Ctx &ctx, GetBranchInfo getBranchInfo, + GetControlTransferAddend getControlTransferAddend, + MergeControlTransferRelocations mergeControlTransferRelocations) { + // Needs to run serially because it writes to the relocations array as well as + // reading relocations of other sections. + for (ELFFileBase *f : ctx.objectFiles) { + auto getRelocBranchInfo = + [&getBranchInfo](Relocation &r, + uint64_t addend) -> std::pair<Relocation *, uint64_t> { + auto *target = dyn_cast_or_null<Defined>(r.sym); + // We don't allow preemptible symbols (may go somewhere else), + // absolute symbols (runtime behavior unknown), non-executable memory + // (ditto) or non-regular sections (no section data). + if (!target || target->isPreemptible || !target->section || + !(target->section->flags & llvm::ELF::SHF_EXECINSTR) || + target->section->kind() != SectionBase::Regular) + return {nullptr, 0}; + return getBranchInfo(*cast<InputSection>(target->section), + target->value + addend); + }; + for (InputSectionBase *s : f->getSections()) { + if (!s) + continue; + for (Relocation &r : s->relocations) { + if (std::optional<uint64_t> addend = + getControlTransferAddend(*cast<InputSection>(s), r)) { + std::pair<Relocation *, uint64_t> targetAndAddend = + getRelocBranchInfo(r, *addend); + if (targetAndAddend.first) { + while (1) { + std::pair<Relocation *, uint64_t> nextTargetAndAddend = + getRelocBranchInfo(*targetAndAddend.first, + targetAndAddend.second); + if (!nextTargetAndAddend.first) + break; + targetAndAddend = nextTargetAndAddend; + } + mergeControlTransferRelocations(r, *targetAndAddend.first); + } + } + } + } + } +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 0c4fd00cab65..0a4578b0aca4 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -11,6 +11,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "TargetImpl.h" #include "lld/Common/ErrorHandler.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Endian.h" @@ -50,6 +51,7 @@ public: bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, InputSection *nextIS) const override; bool relaxOnce(int pass) const override; + void applyBranchToBranchOpt() const override; private: void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; @@ -1162,6 +1164,58 @@ void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { } } +static std::optional<uint64_t> getControlTransferAddend(InputSection &is, + Relocation &r) { + // Identify a control transfer relocation for the branch-to-branch + // optimization. A "control transfer relocation" usually means a CALL or JMP + // target but it also includes relative vtable relocations for example. + // + // We require the relocation type to be PLT32. With a relocation type of PLT32 + // the value may be assumed to be used for branching directly to the symbol + // and the addend is only used to produce the relocated value (hence the + // effective addend is always 0). This is because if a PLT is needed the + // addend will be added to the address of the PLT, and it doesn't make sense + // to branch into the middle of a PLT. For example, relative vtable + // relocations use PLT32 and 0 or a positive value as the addend but still are + // used to branch to the symbol. + if (r.type == R_X86_64_PLT32) + return 0; + return std::nullopt; +} + +static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is, + uint64_t offset) { + auto content = is.contentMaybeDecompress(); + if (content.size() > offset && content[offset] == 0xe9) { // JMP immediate + auto *i = std::lower_bound( + is.relocations.begin(), is.relocations.end(), offset + 1, + [](Relocation &r, uint64_t offset) { return r.offset < offset; }); + // Unlike with getControlTransferAddend() it is valid to accept a PC32 + // relocation here because we know that this is actually a JMP and not some + // other reference, so the interpretation is that we add 4 to the addend and + // use that as the effective addend. + if (i != is.relocations.end() && i->offset == offset + 1 && + (i->type == R_X86_64_PC32 || i->type == R_X86_64_PLT32)) { + return {i, i->addend + 4}; + } + } + return {nullptr, 0}; +} + +static void mergeControlTransferRelocations(Relocation &r1, + const Relocation &r2) { + r1.expr = r2.expr; + r1.sym = r2.sym; + // The +4 is here to compensate for r2.addend which will likely be -4, + // but may also be addend-4 in case of a PC32 branch to symbol+addend. + r1.addend += r2.addend + 4; +} + +void X86_64::applyBranchToBranchOpt() const { + applyBranchToBranchOptImpl(ctx, getBranchInfo, getControlTransferAddend, + mergeControlTransferRelocations); +} + // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT // entries containing endbr64 instructions. A PLT entry will be split into two // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index f0e9592d85dd..b7449b9d13cf 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -276,6 +276,7 @@ struct Config { bool bpFunctionOrderForCompression = false; bool bpDataOrderForCompression = false; bool bpVerboseSectionOrderer = false; + bool branchToBranch = false; bool checkSections; bool checkDynamicRelocs; std::optional<llvm::DebugCompressionType> compressDebugSections; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 9d36071e1532..e79372957e40 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1589,6 +1589,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { ctx.arg.zWxneeded = hasZOption(args, "wxneeded"); setUnresolvedSymbolPolicy(ctx, args); ctx.arg.power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no"; + ctx.arg.branchToBranch = args.hasFlag( + OPT_branch_to_branch, OPT_no_branch_to_branch, ctx.arg.optimize >= 2); if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) { if (arg->getOption().matches(OPT_eb)) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 76d28096f82c..2ce9e07dc6f2 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -59,6 +59,10 @@ def build_id: J<"build-id=">, HelpText<"Generate build ID note">, MetaVarName<"[fast,md5,sha1,uuid,0x<hexstring>]">; def : F<"build-id">, Alias<build_id>, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">; +defm branch_to_branch: BB<"branch-to-branch", + "Enable branch-to-branch optimization (default at -O2)", + "Disable branch-to-branch optimization (default at -O0 and -O1)">; + defm check_sections: B<"check-sections", "Check section addresses for overlaps (default)", "Do not check section addresses for overlaps">; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 277acb26987b..457fd19da549 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1671,9 +1671,10 @@ void RelocationScanner::scan(Relocs<RelTy> rels) { } // Sort relocations by offset for more efficient searching for - // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64. + // R_RISCV_PCREL_HI20, R_PPC64_ADDR64 and the branch-to-branch optimization. if (ctx.arg.emachine == EM_RISCV || - (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc")) + (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") || + ctx.arg.branchToBranch) llvm::stable_sort(sec->relocs(), [](const Relocation &lhs, const Relocation &rhs) { return lhs.offset < rhs.offset; @@ -1964,6 +1965,9 @@ void elf::postScanRelocations(Ctx &ctx) { for (ELFFileBase *file : ctx.objectFiles) for (Symbol *sym : file->getLocalSymbols()) fn(*sym); + + if (ctx.arg.branchToBranch) + ctx.target->applyBranchToBranchOpt(); } static bool mergeCmp(const InputSection *a, const InputSection *b) { diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index fd1e5d33c438..6dd20b2f0cba 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -101,6 +101,7 @@ public: virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} + virtual void applyBranchToBranchOpt() const {} virtual ~TargetInfo(); diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 7b2650637cb1..d7b987ded784 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -93,6 +93,10 @@ Bind default visibility defined STB_GLOBAL function symbols locally for .Fl shared. .It Fl --be8 Write a Big Endian ELF File using BE8 format(AArch32 only) +.It Fl -branch-to-branch +Enable the branch-to-branch optimizations: a branch whose target is +another branch instruction is rewritten to point to the latter branch +target (AArch64 and X86_64 only). Enabled by default at -O2. .It Fl -build-id Ns = Ns Ar value Generate a build ID note. .Ar value @@ -414,7 +418,7 @@ If not specified, .Dv a.out is used as a default. .It Fl O Ns Ar value -Optimize output file size. +Optimize output file. .Ar value may be: .Pp @@ -424,7 +428,7 @@ Disable string merging. .It Cm 1 Enable string merging. .It Cm 2 -Enable string tail merging. +Enable string tail merging and branch-to-branch optimization. .El .Pp .Fl O Ns Cm 1 diff --git a/lld/test/ELF/aarch64-branch-to-branch.s b/lld/test/ELF/aarch64-branch-to-branch.s new file mode 100644 index 000000000000..06c899fd4e6b --- /dev/null +++ b/lld/test/ELF/aarch64-branch-to-branch.s @@ -0,0 +1,61 @@ +# REQUIRES: aarch64 + +## Test that the branch-to-branch optimization follows the links +## from f1 -> f2 -> f3 and updates all references to point to f3. + +# RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t --branch-to-branch +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s +# RUN: ld.lld %t.o -o %t -O2 +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s + +## Test that branch-to-branch is disabled by default. + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s +# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s + +## Test that branch-to-branch is disabled for preemptible symbols. + +# RUN: ld.lld %t.o -o %t --branch-to-branch -shared +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s + +.section .rodata.vtable,"a" +.globl vtable +vtable: +# B2B: Contents of section .rodata: +# B2B-NEXT: [[VF:[0-9a-f]{8}]] +.4byte f1@PLT - vtable +# B2B-SAME: [[VF]] +.4byte f2@PLT - vtable +# B2B-SAME: [[VF]] +.4byte f3@PLT - vtable + +.section .text._start,"ax" +.globl _start +# CHECK: <_start>: +_start: +# B2B: bl {{.*}} <f3> +# NOB2B: bl {{.*}} <f1{{.*}}> +bl f1 +# B2B: b {{.*}} <f3> +# NOB2B: b {{.*}} <f2{{.*}}> +b f2 + +.section .text.f1,"ax" +.globl f1 +f1: +b f2 + +.section .text.f2,"ax" +.globl f2 +# CHECK: <f2>: +f2: +# CHECK-NEXT: b {{.*}} <f3{{.*}}> +b f3 + +.section .text.f3,"ax" +.globl f3 +f3: +ret diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s new file mode 100644 index 000000000000..0c9e903438f8 --- /dev/null +++ b/lld/test/ELF/x86-64-branch-to-branch.s @@ -0,0 +1,61 @@ +# REQUIRES: x86 + +## Test that the branch-to-branch optimization follows the links +## from f1 -> f2 -> f3 and updates all references to point to f3. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t --branch-to-branch +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s +# RUN: ld.lld %t.o -o %t -O2 +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s + +## Test that branch-to-branch is disabled by default. + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s +# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s + +## Test that branch-to-branch is disabled for preemptible symbols. + +# RUN: ld.lld %t.o -o %t --branch-to-branch -shared +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s + +.section .rodata.vtable,"a" +.globl vtable +vtable: +# B2B: Contents of section .rodata: +# B2B-NEXT: [[VF:[0-9a-f]{8}]] +.4byte f1@PLT - vtable +# B2B-SAME: [[VF]] +.4byte f2@PLT - vtable +# B2B-SAME: [[VF]] +.4byte f3@PLT - vtable + +.section .text._start,"ax" +.globl _start +# CHECK: <_start>: +_start: +# B2B-NEXT: jmp {{.*}} <f3> +# NOB2B-NEXT: jmp {{.*}} <f1{{.*}}> +jmp f1 +# B2B-NEXT: jmp {{.*}} <f3> +# NOB2B-NEXT: jmp {{.*}} <f2{{.*}}> +jmp f2 + +.section .text.f1,"ax" +.globl f1 +f1: +jmp f2 + +.section .text.f2,"ax" +.globl f2 +# CHECK: <f2>: +f2: +# CHECK-NEXT: jmp {{.*}} <f3{{.*}}> +jmp f3 + +.section .text.f3,"ax" +.globl f3 +f3: +ret |
