summaryrefslogtreecommitdiff
path: root/bolt/lib
diff options
context:
space:
mode:
Diffstat (limited to 'bolt/lib')
-rw-r--r--bolt/lib/Core/BinaryContext.cpp19
-rw-r--r--bolt/lib/Core/BinaryEmitter.cpp8
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp12
-rw-r--r--bolt/lib/Core/DIEBuilder.cpp22
-rw-r--r--bolt/lib/Core/DebugNames.cpp7
-rw-r--r--bolt/lib/Core/FunctionLayout.cpp4
-rw-r--r--bolt/lib/Core/GDBIndex.cpp7
-rw-r--r--bolt/lib/Passes/ADRRelaxationPass.cpp1
-rw-r--r--bolt/lib/Passes/BinaryPasses.cpp2
-rw-r--r--bolt/lib/Passes/CMakeLists.txt1
-rw-r--r--bolt/lib/Passes/ContinuityStats.cpp250
-rw-r--r--bolt/lib/Passes/IdenticalCodeFolding.cpp12
-rw-r--r--bolt/lib/Passes/LongJmp.cpp440
-rw-r--r--bolt/lib/Passes/PatchEntries.cpp15
-rw-r--r--bolt/lib/Passes/VeneerElimination.cpp43
-rw-r--r--bolt/lib/Profile/BoltAddressTranslation.cpp22
-rw-r--r--bolt/lib/Profile/YAMLProfileReader.cpp6
-rw-r--r--bolt/lib/Rewrite/BinaryPassManager.cpp9
-rw-r--r--bolt/lib/Rewrite/DWARFRewriter.cpp36
-rw-r--r--bolt/lib/Rewrite/PseudoProbeRewriter.cpp20
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp42
21 files changed, 310 insertions, 668 deletions
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index ba2de6ce2b28..1347047e1b70 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1609,18 +1609,10 @@ std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
llvm::stable_sort(SortedFunctions,
[](const BinaryFunction *A, const BinaryFunction *B) {
- // Place hot text movers at the start.
- if (A->isHotTextMover() && !B->isHotTextMover())
- return true;
- if (!A->isHotTextMover() && B->isHotTextMover())
- return false;
if (A->hasValidIndex() && B->hasValidIndex()) {
return A->getIndex() < B->getIndex();
}
- if (opts::HotFunctionsAtEnd)
- return B->hasValidIndex();
- else
- return A->hasValidIndex();
+ return A->hasValidIndex();
});
return SortedFunctions;
}
@@ -2362,15 +2354,6 @@ BinaryContext::createInjectedBinaryFunction(const std::string &Name,
return BF;
}
-BinaryFunction *
-BinaryContext::createThunkBinaryFunction(const std::string &Name) {
- ThunkBinaryFunctions.push_back(new BinaryFunction(Name, *this, true));
- BinaryFunction *BF = ThunkBinaryFunctions.back();
- setSymbolToFunctionMap(BF->getSymbol(), BF);
- BF->CurrentState = BinaryFunction::State::CFG;
- return BF;
-}
-
std::pair<size_t, size_t>
BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
// Adjust branch instruction to match the current layout.
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 89043db03102..f6dfa249f9a9 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -258,14 +258,6 @@ void BinaryEmitter::emitFunctions() {
if (Emitted)
Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics);
-
- // Emit thunks.
- if (BC.getThunkLocation() != Function)
- continue;
-
- for (BinaryFunction *Thunk : BC.getThunkBinaryFunctions()) {
- emitFunction(*Thunk, Thunk->getLayout().getMainFragment());
- }
}
};
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 27c8ccefedee..36c42fced93d 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -111,10 +111,6 @@ cl::opt<bool>
cl::desc("try to preserve basic block alignment"),
cl::cat(BoltOptCategory));
-static cl::opt<bool> PrintOffsets("print-offsets",
- cl::desc("print basic block offsets"),
- cl::Hidden, cl::cat(BoltOptCategory));
-
static cl::opt<bool> PrintOutputAddressRange(
"print-output-address-range",
cl::desc(
@@ -545,11 +541,6 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
if (BB->isLandingPad())
OS << " Landing Pad\n";
- if (opts::PrintOffsets && BB->getOutputStartAddress()) {
- OS << " OutputOffset: 0x"
- << Twine::utohexstr(BB->getOutputStartAddress()) << '\n';
- }
-
uint64_t BBExecCount = BB->getExecutionCount();
if (hasValidProfile()) {
OS << " Exec Count : ";
@@ -4562,9 +4553,6 @@ void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
}
bool BinaryFunction::isAArch64Veneer() const {
- if (hasNameRegex("__AArch64.*Thunk.*"))
- return true;
-
if (empty() || hasIslandsInfo())
return false;
diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp
index fa79d60ac01e..69cfd58a1df0 100644
--- a/bolt/lib/Core/DIEBuilder.cpp
+++ b/bolt/lib/Core/DIEBuilder.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/Timer.h"
#include <algorithm>
#include <cstdint>
@@ -39,8 +38,7 @@
#define DEBUG_TYPE "bolt"
namespace opts {
extern cl::opt<unsigned> Verbosity;
-extern cl::opt<bool> TimeDebug;
-} // namespace opts
+}
namespace llvm {
namespace bolt {
@@ -351,8 +349,6 @@ void DIEBuilder::buildCompileUnits(const bool Init) {
}
}
void DIEBuilder::buildCompileUnits(const std::vector<DWARFUnit *> &CUs) {
- NamedRegionTimer T("buildcompileunits", "build compile units", "debug",
- "update debug info", opts::TimeDebug);
BuilderState.reset(new State());
// Allocating enough for current batch being processed.
// In real use cases we either processing a batch of CUs with no cross
@@ -560,8 +556,6 @@ void DIEBuilder::populateDebugNamesTable(
}
void DIEBuilder::updateDebugNamesTable() {
- NamedRegionTimer T("updatedebugnames", "update debug_names table",
- "debug", "update debug info", opts::TimeDebug);
auto finalizeDebugNamesTableForCU = [&](DWARFUnit &CU,
uint64_t &UnitStartOffset) -> void {
DIE *UnitDIE = getUnitDIEbyUnit(CU);
@@ -572,14 +566,18 @@ void DIEBuilder::updateDebugNamesTable() {
UnitStartOffset += CurUnitInfo.UnitLength;
};
- auto It = llvm::partition_point(getState().DUList, [](DWARFUnit *CU) {
- return CU->getVersion() < 5 && CU->isTypeUnit();
- });
uint64_t TypeUnitStartOffset = 0;
- for (DWARFUnit *CU : llvm::make_range(getState().DUList.begin(), It))
+ for (DWARFUnit *CU : getState().DUList) {
+ if (!(CU->getVersion() < 5 && CU->isTypeUnit()))
+ break;
finalizeDebugNamesTableForCU(*CU, TypeUnitStartOffset);
- for (DWARFUnit *CU : llvm::make_range(It, getState().DUList.end()))
+ }
+
+ for (DWARFUnit *CU : getState().DUList) {
+ if (CU->getVersion() < 5 && CU->isTypeUnit())
+ continue;
finalizeDebugNamesTableForCU(*CU, DebugNamesUnitSize);
+ }
updateReferences();
}
diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp
index d014c2c2fbcf..640b29ec36d5 100644
--- a/bolt/lib/Core/DebugNames.cpp
+++ b/bolt/lib/Core/DebugNames.cpp
@@ -10,16 +10,11 @@
#include "bolt/Core/BinaryContext.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/Timer.h"
#include <cstdint>
#include <optional>
-namespace opts {
-extern llvm::cl::opt<bool> TimeDebug;
-} // namespace opts
namespace llvm {
namespace bolt {
DWARF5AcceleratorTable::DWARF5AcceleratorTable(
@@ -745,8 +740,6 @@ void DWARF5AcceleratorTable::emitAugmentationString() const {
void DWARF5AcceleratorTable::emitAccelTable() {
if (!NeedToCreate)
return;
- NamedRegionTimer T("emitAccelTable", "Emit Accelerator Table",
- "debug", "Update Debug Info", opts::TimeDebug);
finalize();
populateAbbrevsMap();
writeEntries();
diff --git a/bolt/lib/Core/FunctionLayout.cpp b/bolt/lib/Core/FunctionLayout.cpp
index 4498fc44da95..15e6127ad2e9 100644
--- a/bolt/lib/Core/FunctionLayout.cpp
+++ b/bolt/lib/Core/FunctionLayout.cpp
@@ -33,9 +33,7 @@ FunctionFragment::const_iterator FunctionFragment::end() const {
return const_iterator(Layout->block_begin() + StartIndex + Size);
}
-BinaryBasicBlock *FunctionFragment::front() const { return *begin(); }
-
-BinaryBasicBlock *FunctionFragment::back() const { return *std::prev(end()); }
+const BinaryBasicBlock *FunctionFragment::front() const { return *begin(); }
FunctionLayout::FunctionLayout() { addFragment(); }
diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp
index a3aecf4651bc..c7fb4889646b 100644
--- a/bolt/lib/Core/GDBIndex.cpp
+++ b/bolt/lib/Core/GDBIndex.cpp
@@ -7,12 +7,7 @@
//===----------------------------------------------------------------------===//
#include "bolt/Core/GDBIndex.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Timer.h"
-namespace opts {
-extern llvm::cl::opt<bool> TimeDebug;
-} // namespace opts
using namespace llvm::bolt;
using namespace llvm::support::endian;
@@ -28,8 +23,6 @@ void GDBIndex::updateGdbIndexSection(
DebugARangesSectionWriter &ARangesSectionWriter) {
if (!BC.getGdbIndexSection())
return;
- NamedRegionTimer T("updateGdbIndex", "Update gdb_index Section",
- "debug", "Update Debug Info", opts::TimeDebug);
// See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
// for .gdb_index section format.
diff --git a/bolt/lib/Passes/ADRRelaxationPass.cpp b/bolt/lib/Passes/ADRRelaxationPass.cpp
index a7f99b6bb547..52811edcb827 100644
--- a/bolt/lib/Passes/ADRRelaxationPass.cpp
+++ b/bolt/lib/Passes/ADRRelaxationPass.cpp
@@ -63,6 +63,7 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol);
if (TargetBF == &BF && !BB.isSplit())
continue;
+
// No relaxation needed if ADR references a basic block in the same
// fragment.
if (BinaryBasicBlock *TargetBB = BF.getBasicBlockForLabel(Symbol))
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index ca69667f9c85..fa95ad7324ac 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1264,8 +1264,6 @@ Error AssignSections::runOnFunctions(BinaryContext &BC) {
if (opts::isHotTextMover(Function)) {
Function.setCodeSectionName(BC.getHotTextMoverSectionName());
Function.setColdCodeSectionName(BC.getHotTextMoverSectionName());
- // TODO: find a better place to mark a function as a mover.
- Function.setHotTextMover(true);
continue;
}
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index 407d8b03f739..1c1273b3d242 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_library(LLVMBOLTPasses
PatchEntries.cpp
PettisAndHansen.cpp
PLTCall.cpp
+ ContinuityStats.cpp
RegAnalysis.cpp
RegReAssign.cpp
ReorderAlgorithm.cpp
diff --git a/bolt/lib/Passes/ContinuityStats.cpp b/bolt/lib/Passes/ContinuityStats.cpp
new file mode 100644
index 000000000000..b32365b59065
--- /dev/null
+++ b/bolt/lib/Passes/ContinuityStats.cpp
@@ -0,0 +1,250 @@
+//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the continuity stats calculation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/ContinuityStats.h"
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/Support/CommandLine.h"
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+
+#define DEBUG_TYPE "bolt-opts"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace opts {
+extern cl::opt<unsigned> Verbosity;
+cl::opt<unsigned> NumFunctionsForContinuityCheck(
+ "num-functions-for-continuity-check",
+ cl::desc("number of hottest functions to print aggregated "
+ "CFG discontinuity stats of."),
+ cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
+} // namespace opts
+
+namespace {
+using FunctionListType = std::vector<const BinaryFunction *>;
+using function_iterator = FunctionListType::iterator;
+
+template <typename T>
+void printDistribution(raw_ostream &OS, std::vector<T> &values,
+ bool Fraction = false) {
+ if (values.empty())
+ return;
+ // Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%,
+ // 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as
+ // fractions instead of integers.
+ std::sort(values.begin(), values.end());
+
+ auto printLine = [&](std::string Text, double Percent) {
+ int Rank = int(values.size() * (1.0 - Percent / 100));
+ if (Percent == 0)
+ Rank = values.size() - 1;
+ if (Fraction)
+ OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
+ << format("%.2lf%%", values[Rank] * 100) << "\n";
+ else
+ OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
+ << values[Rank] << "\n";
+ };
+
+ printLine("MAX", 0);
+ const int percentages[] = {1, 5, 10, 20, 50, 80};
+ for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
+ printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
+ }
+ printLine("MIN", 100);
+}
+
+void printCFGContinuityStats(raw_ostream &OS,
+ iterator_range<function_iterator> &Functions) {
+ // Given a perfect profile, every positive-execution-count BB should be
+ // connected to an entry of the function through a positive-execution-count
+ // directed path in the control flow graph.
+ std::vector<size_t> NumUnreachables;
+ std::vector<size_t> SumECUnreachables;
+ std::vector<double> FractionECUnreachables;
+
+ for (auto it = Functions.begin(); it != Functions.end(); ++it) {
+ const BinaryFunction *Function = *it;
+ if (Function->size() <= 1)
+ continue;
+
+ // Compute the sum of all BB execution counts (ECs).
+ size_t NumPosECBBs = 0;
+ size_t SumAllBBEC = 0;
+ for (const BinaryBasicBlock &BB : *Function) {
+ const size_t BBEC = BB.getKnownExecutionCount();
+ NumPosECBBs += BBEC > 0 ? 1 : 0;
+ SumAllBBEC += BBEC;
+ }
+
+ // Perform BFS on subgraph of CFG induced by positive weight edges.
+ // Compute the number of BBs reachable from the entry(s) of the function and
+ // the sum of their execution counts (ECs).
+ std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB;
+ std::unordered_set<unsigned> Visited;
+ std::queue<unsigned> Queue;
+ for (const BinaryBasicBlock &BB : *Function) {
+ // Make sure BB.getIndex() is not already in IndexToBB.
+ assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end());
+ IndexToBB[BB.getIndex()] = &BB;
+ if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) {
+ Queue.push(BB.getIndex());
+ Visited.insert(BB.getIndex());
+ }
+ }
+ while (!Queue.empty()) {
+ const unsigned BBIndex = Queue.front();
+ const BinaryBasicBlock *BB = IndexToBB[BBIndex];
+ Queue.pop();
+ auto SuccBIIter = BB->branch_info_begin();
+ for (const BinaryBasicBlock *Succ : BB->successors()) {
+ const uint64_t Count = SuccBIIter->Count;
+ if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
+ ++SuccBIIter;
+ continue;
+ }
+ if (!Visited.insert(Succ->getIndex()).second) {
+ ++SuccBIIter;
+ continue;
+ }
+ Queue.push(Succ->getIndex());
+ ++SuccBIIter;
+ }
+ }
+
+ const size_t NumReachableBBs = Visited.size();
+
+ // Loop through Visited, and sum the corresponding BBs' execution counts
+ // (ECs).
+ size_t SumReachableBBEC = 0;
+ for (const unsigned BBIndex : Visited) {
+ const BinaryBasicBlock *BB = IndexToBB[BBIndex];
+ SumReachableBBEC += BB->getKnownExecutionCount();
+ }
+
+ const size_t NumPosECBBsUnreachableFromEntry =
+ NumPosECBBs - NumReachableBBs;
+ const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
+ const double FractionECUnreachable =
+ (double)SumUnreachableBBEC / SumAllBBEC;
+
+ if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
+ OS << "Non-trivial CFG discontinuity observed in function "
+ << Function->getPrintName() << "\n";
+ LLVM_DEBUG(Function->dump());
+ }
+
+ NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
+ SumECUnreachables.push_back(SumUnreachableBBEC);
+ FractionECUnreachables.push_back(FractionECUnreachable);
+ }
+
+ if (FractionECUnreachables.empty())
+ return;
+
+ std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
+ const int Rank = int(FractionECUnreachables.size() * 0.95);
+ OS << format("top 5%% function CFG discontinuity is %.2lf%%\n",
+ FractionECUnreachables[Rank] * 100);
+
+ if (opts::Verbosity >= 1) {
+ OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n"
+ << "distribution of NUM(unreachable POS BBs) among all focal "
+ "functions\n";
+ printDistribution(OS, NumUnreachables);
+
+ OS << "distribution of SUM_EC(unreachable POS BBs) among all focal "
+ "functions\n";
+ printDistribution(OS, SumECUnreachables);
+
+ OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
+ "POS BBs))] among all focal functions\n";
+ printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
+ }
+}
+
+void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
+ size_t NumTopFunctions) {
+ // Sort the list of functions by execution counts (reverse).
+ llvm::sort(ValidFunctions,
+ [&](const BinaryFunction *A, const BinaryFunction *B) {
+ return A->getKnownExecutionCount() > B->getKnownExecutionCount();
+ });
+
+ const size_t RealNumTopFunctions =
+ std::min(NumTopFunctions, ValidFunctions.size());
+
+ iterator_range<function_iterator> Functions(
+ ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions);
+
+ BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
+ RealNumTopFunctions);
+ printCFGContinuityStats(BC.outs(), Functions);
+
+ // Print more detailed bucketed stats if requested.
+ if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
+ const size_t PerBucketSize = RealNumTopFunctions / 5;
+ BC.outs() << format(
+ "Detailed stats for 5 buckets, each with %zu functions:\n",
+ PerBucketSize);
+
+ // For each bucket, print the CFG continuity stats of the functions in the
+ // bucket.
+ for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) {
+ const size_t StartIndex = BucketIndex * PerBucketSize;
+ const size_t EndIndex = StartIndex + PerBucketSize;
+ iterator_range<function_iterator> Functions(
+ ValidFunctions.begin() + StartIndex,
+ ValidFunctions.begin() + EndIndex);
+ const size_t MaxFunctionExecutionCount =
+ ValidFunctions[StartIndex]->getKnownExecutionCount();
+ const size_t MinFunctionExecutionCount =
+ ValidFunctions[EndIndex - 1]->getKnownExecutionCount();
+ BC.outs() << format("----------------\n| Bucket %zu: "
+ "|\n----------------\n",
+ BucketIndex + 1)
+ << format(
+ "execution counts of the %zu functions in the bucket: "
+ "%zu-%zu\n",
+ EndIndex - StartIndex, MinFunctionExecutionCount,
+ MaxFunctionExecutionCount);
+ printCFGContinuityStats(BC.outs(), Functions);
+ }
+ }
+}
+} // namespace
+
+bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const {
+ if (BF.empty() || !BF.hasValidProfile())
+ return false;
+
+ return BinaryFunctionPass::shouldOptimize(BF);
+}
+
+Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) {
+ // Create a list of functions with valid profiles.
+ FunctionListType ValidFunctions;
+ for (const auto &BFI : BC.getBinaryFunctions()) {
+ const BinaryFunction *Function = &BFI.second;
+ if (PrintContinuityStats::shouldOptimize(*Function))
+ ValidFunctions.push_back(Function);
+ }
+ if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0)
+ return Error::success();
+
+ printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck);
+ return Error::success();
+}
diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp
index 8a8fa0639ec2..38e080c9dd62 100644
--- a/bolt/lib/Passes/IdenticalCodeFolding.cpp
+++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp
@@ -44,18 +44,6 @@ TimeICF("time-icf",
cl::cat(BoltOptCategory));
} // namespace opts
-bool IdenticalCodeFolding::shouldOptimize(const BinaryFunction &BF) const {
- if (BF.hasUnknownControlFlow())
- return false;
- if (BF.isFolded())
- return false;
- if (BF.hasSDTMarker())
- return false;
- if (BF.isPseudo())
- return false;
- return BinaryFunctionPass::shouldOptimize(BF);
-}
-
/// Compare two jump tables in 2 functions. The function relies on consistent
/// ordering of basic blocks in both binary functions (e.g. DFS).
static bool equalJumpTables(const JumpTable &JumpTableA,
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 932d4fd7508f..c483f70a836e 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -11,8 +11,6 @@
//===----------------------------------------------------------------------===//
#include "bolt/Passes/LongJmp.h"
-#include "bolt/Core/ParallelUtilities.h"
-#include "llvm/Support/MathExtras.h"
#define DEBUG_TYPE "longjmp"
@@ -25,11 +23,6 @@ extern cl::opt<unsigned> AlignFunctions;
extern cl::opt<bool> UseOldText;
extern cl::opt<bool> HotFunctionsAtEnd;
-static cl::opt<bool>
- ExperimentalRelaxation("relax-exp",
- cl::desc("run experimental relaxation pass"),
- cl::init(false), cl::cat(BoltOptCategory));
-
static cl::opt<bool> GroupStubs("group-stubs",
cl::desc("share stubs across functions"),
cl::init(true), cl::cat(BoltOptCategory));
@@ -68,10 +61,10 @@ static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) {
if (Next != E && (*Next)->isCold())
return *I;
}
- llvm_unreachable("No hot-cold split point found");
+ llvm_unreachable("No hot-colt split point found");
}
-static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) {
+static bool shouldInsertStub(const BinaryContext &BC, const MCInst &Inst) {
return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) &&
!BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst);
}
@@ -572,7 +565,7 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) {
if (BC.MIB->isPseudo(Inst))
continue;
- if (!mayNeedStub(BC, Inst)) {
+ if (!shouldInsertStub(BC, Inst)) {
DotAddress += InsnSize;
continue;
}
@@ -636,434 +629,7 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) {
return Error::success();
}
-// Relax internal branches with the assumption that they are not separated by
-// more than 128MB after the function is split into fragments.
-void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
- BinaryContext &BC = BF.getBinaryContext();
- auto &MIB = BC.MIB;
-
- if (!BF.isSimple())
- return;
-
- // Quick path.
- if (!BF.isSplit() && BF.estimateSize() < ShortestJumpSpan)
- return;
-
- auto isBranchOffsetInRange = [&](const MCInst &Inst, int64_t Offset) {
- const unsigned Bits = MIB->getPCRelEncodingSize(Inst);
- return isIntN(Bits, Offset);
- };
-
- auto isBlockInRange = [&](const MCInst &Inst, uint64_t InstAddress,
- const BinaryBasicBlock &BB) {
- const int64_t Offset = BB.getOutputStartAddress() - InstAddress;
- return isBranchOffsetInRange(Inst, Offset);
- };
-
- // Keep track of all function trampolines that are going to be added to the
- // function layout at the end of relaxation.
- std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>>
- FunctionTrampolines;
-
- // Fragments are relaxed independently.
- for (FunctionFragment &FF : BF.getLayout().fragments()) {
- // Fill out code size estimation for the fragment. Use output BB address
- // range for offsets from the start of the function.
- uint64_t CodeSize = 0;
- for (BinaryBasicBlock *BB : FF) {
- BB->setOutputStartAddress(CodeSize);
- CodeSize += BB->estimateSize();
- BB->setOutputEndAddress(CodeSize);
- }
-
- // Dynamically-updated size of the fragment.
- uint64_t FragmentSize = CodeSize;
-
- // Trampolines created for the fragment. DestinationBB -> TrampolineBB.
- DenseMap<const BinaryBasicBlock *, BinaryBasicBlock *> FragmentTrampolines;
-
- // Create a trampoline code after \p BB or at the end of the fragment if BB
- // is nullptr.
- auto addTrampolineAfter = [&](BinaryBasicBlock *BB,
- BinaryBasicBlock *TargetBB, uint64_t Count,
- bool UpdateOffsets = true) {
- std::unique_ptr<BinaryBasicBlock> TrampolineBB = BF.createBasicBlock();
- MCInst Inst;
- {
- auto L = BC.scopeLock();
- MIB->createUncondBranch(Inst, TargetBB->getLabel(), BC.Ctx.get());
- }
- TrampolineBB->addInstruction(Inst);
- TrampolineBB->addSuccessor(TargetBB, Count);
- TrampolineBB->setExecutionCount(Count);
- const uint64_t TrampolineAddress =
- BB ? BB->getOutputEndAddress() : FragmentSize;
- TrampolineBB->setOutputStartAddress(TrampolineAddress);
- TrampolineBB->setOutputEndAddress(TrampolineAddress + InstSize);
- TrampolineBB->setFragmentNum(FF.getFragmentNum());
-
- if (UpdateOffsets) {
- FragmentSize += InstSize;
- for (BinaryBasicBlock *TBB : FF) {
- if (TBB->getOutputStartAddress() >= TrampolineAddress) {
- TBB->setOutputStartAddress(TBB->getOutputStartAddress() + InstSize);
- TBB->setOutputEndAddress(TBB->getOutputEndAddress() + InstSize);
- }
- }
- for (auto &Pair : FunctionTrampolines) {
- BinaryBasicBlock *TBB = Pair.second.get();
- if (TBB->getFragmentNum() != TrampolineBB->getFragmentNum())
- continue;
- if (TBB == TrampolineBB.get())
- continue;
- if (TBB->getOutputStartAddress() >= TrampolineAddress) {
- TBB->setOutputStartAddress(TBB->getOutputStartAddress() + InstSize);
- TBB->setOutputEndAddress(TBB->getOutputEndAddress() + InstSize);
- }
- }
- }
-
- if (!FragmentTrampolines.lookup(TargetBB))
- FragmentTrampolines[TargetBB] = TrampolineBB.get();
- FunctionTrampolines.emplace_back(BB ? BB : FF.back(),
- std::move(TrampolineBB));
- return FunctionTrampolines.back().second.get();
- };
-
- // Pre-populate trampolines by splitting unconditional branches from the
- // containing basic block.
- for (BinaryBasicBlock *BB : FF) {
- MCInst *Inst = BB->getLastNonPseudoInstr();
- if (!Inst || !MIB->isUnconditionalBranch(*Inst))
- continue;
-
- const MCSymbol *TargetSymbol = MIB->getTargetSymbol(*Inst);
- BB->eraseInstruction(BB->findInstruction(Inst));
- BB->setOutputEndAddress(BB->getOutputEndAddress() - InstSize);
-
- BinaryBasicBlock::BinaryBranchInfo BI;
- BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol, BI);
-
- BinaryBasicBlock *TrampolineBB =
- addTrampolineAfter(BB, TargetBB, BI.Count, /*UpdateOffsets*/ false);
- BB->replaceSuccessor(TargetBB, TrampolineBB, BI.Count);
- }
-
- /// Relax the branch \p Inst. Return true if basic block offsets need an
- /// update after the trampoline insertion.
- auto relaxBranch = [&](BinaryBasicBlock *BB, MCInst &Inst,
- uint64_t InstAddress, BinaryBasicBlock *TargetBB) {
- BinaryFunction *BF = BB->getParent();
-
- // Branch taken count for optimal relaxation.
- const uint64_t Count = BB->getBranchInfo(*TargetBB).Count;
- assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
- "Expected valid branch execution count");
-
- // Try to reuse an existing trampoline without introducing any new code.
- BinaryBasicBlock *TrampolineBB = FragmentTrampolines.lookup(TargetBB);
- if (TrampolineBB && isBlockInRange(Inst, InstAddress, *TrampolineBB)) {
- BB->replaceSuccessor(TargetBB, TrampolineBB, Count);
- TrampolineBB->setExecutionCount(TrampolineBB->getExecutionCount() +
- Count);
- auto L = BC.scopeLock();
- MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get());
- return;
- }
-
- // For cold branches, check if we can introduce a trampoline at the end
- // of the fragment that is within the branch reach. Note that such
- // trampoline may become unreachable and may need further relaxation.
- const int64_t OffsetToEnd = FragmentSize - InstAddress;
- if (Count == 0 && isBranchOffsetInRange(Inst, OffsetToEnd)) {
- TrampolineBB = addTrampolineAfter(nullptr, TargetBB, Count);
- BB->replaceSuccessor(TargetBB, TrampolineBB, Count);
- auto L = BC.scopeLock();
- MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get());
-
- return;
- }
-
- // Insert a new block after the current one and use it as a trampoline.
- // If the other successor is a fallthrough invert the condition code.
- TrampolineBB = addTrampolineAfter(BB, TargetBB, Count);
-
- // Check if there's a fallthrough block.
- const BinaryBasicBlock *const NextBB =
- BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false);
- if (BB->getConditionalSuccessor(false) == NextBB) {
- BB->swapConditionalSuccessors();
- auto L = BC.scopeLock();
- MIB->reverseBranchCondition(Inst, NextBB->getLabel(), BC.Ctx.get());
- } else {
- auto L = BC.scopeLock();
- MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get());
- }
- BB->replaceSuccessor(TargetBB, TrampolineBB, Count);
- };
-
- bool MayNeedRelaxation;
- uint64_t NumIterations = 0;
- do {
- MayNeedRelaxation = false;
- ++NumIterations;
- for (auto BBI = FF.begin(); BBI != FF.end(); ++BBI) {
- BinaryBasicBlock *BB = *BBI;
- uint64_t NextInstOffset = BB->getOutputStartAddress();
- for (MCInst &Inst : *BB) {
- const size_t InstAddress = NextInstOffset;
- if (!MIB->isPseudo(Inst))
- NextInstOffset += 4;
-
- if (!mayNeedStub(BF.getBinaryContext(), Inst))
- continue;
-
- const size_t BitsAvailable = MIB->getPCRelEncodingSize(Inst);
-
- // Span of +/-128MB.
- if (BitsAvailable == LongestJumpBits)
- continue;
-
- const MCSymbol *TargetSymbol = MIB->getTargetSymbol(Inst);
- BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol);
- assert(TargetBB &&
- "Basic block target expected for conditional branch.");
-
- // Check if the relaxation is needed.
- if (TargetBB->getFragmentNum() == FF.getFragmentNum() &&
- isBlockInRange(Inst, InstAddress, *TargetBB))
- continue;
-
- relaxBranch(BB, Inst, InstAddress, TargetBB);
-
- MayNeedRelaxation = true;
- }
- }
-
- // We may have added new instructions, but the whole fragment is less than
- // the minimum branch span.
- if (FragmentSize < ShortestJumpSpan)
- MayNeedRelaxation = false;
-
- } while (MayNeedRelaxation);
-
- LLVM_DEBUG({
- if (NumIterations > 2) {
- dbgs() << "BOLT-DEBUG: relaxed fragment " << FF.getFragmentNum().get()
- << " in " << NumIterations << " iterations in " << BF << '\n';
- }
- });
- }
-
- // Add trampoline blocks from all fragments to the layout.
- DenseMap<BinaryBasicBlock *, std::vector<std::unique_ptr<BinaryBasicBlock>>>
- Insertions;
- for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Pair :
- FunctionTrampolines) {
- if (!Pair.second)
- continue;
- Insertions[Pair.first].emplace_back(std::move(Pair.second));
- }
-
- for (auto &Pair : Insertions) {
- BF.insertBasicBlocks(Pair.first, std::move(Pair.second),
- /*UpdateLayout*/ true, /*UpdateCFI*/ true,
- /*RecomputeLPs*/ false);
- }
-}
-
-void LongJmpPass::relaxCalls(BinaryContext &BC) {
- // Map every function to its direct callees. Note that this is different from
- // a typical call graph as we completely ignore indirect calls.
- uint64_t EstimatedSize = 0;
- // Conservatively estimate emitted function size.
- auto estimateFunctionSize = [&](const BinaryFunction &BF) -> uint64_t {
- if (!BC.shouldEmit(BF))
- return 0;
- uint64_t Size = BF.estimateSize();
- if (BF.hasValidIndex())
- Size += BF.getAlignment();
- if (BF.hasIslandsInfo()) {
- Size += BF.estimateConstantIslandSize();
- Size += BF.getConstantIslandAlignment();
- }
-
- return Size;
- };
-
- std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>> CallMap;
- for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
- if (!BC.shouldEmit(BF))
- continue;
-
- EstimatedSize += estimateFunctionSize(BF);
-
- for (const BinaryBasicBlock &BB : BF) {
- for (const MCInst &Inst : BB) {
- if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) ||
- BC.MIB->isIndirectBranch(Inst))
- continue;
- const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
- assert(TargetSymbol);
-
- BinaryFunction *Callee = BC.getFunctionForSymbol(TargetSymbol);
- if (!Callee) {
- /* Ignore internall calls */
- continue;
- }
-
- CallMap[&BF].insert(Callee);
- }
- }
- }
-
- LLVM_DEBUG(dbgs() << "LongJmp: estimated code size : " << EstimatedSize
- << '\n');
-
- // Build clusters in the order the functions will appear in the output.
- std::vector<FunctionCluster> Clusters;
- Clusters.emplace_back(FunctionCluster());
-
- for (BinaryFunction *BF : BC.getSortedFunctions()) {
- if (!BC.shouldEmit(*BF))
- continue;
-
- const uint64_t BFSize = estimateFunctionSize(*BF);
- if (Clusters.empty() || Clusters.back().Size + BFSize > MaxClusterSize) {
- Clusters.emplace_back(FunctionCluster());
- }
-
- FunctionCluster &FC = Clusters.back();
- FC.Functions.insert(BF);
- auto It = FC.Callees.find(BF);
- if (It != FC.Callees.end()) {
- FC.Callees.erase(It);
- }
- FC.Size += BFSize;
- FC.LastBF = BF;
-
- for (BinaryFunction *Callee : CallMap[BF])
- if (!FC.Functions.count(Callee))
- FC.Callees.insert(Callee);
- }
-
- // Print cluster stats.
- dbgs() << "Built " << Clusters.size() << " clusters\n";
- uint64_t Index = 0;
- for (const FunctionCluster &FC : Clusters) {
- dbgs() << " Cluster: " << Index++ << '\n';
- dbgs() << " " << FC.Functions.size() << " functions\n";
- dbgs() << " " << FC.Callees.size() << " callees\n";
- dbgs() << " " << FC.Size << " bytes\n";
- }
-
- if (Clusters.size() > 2) {
- BC.errs() << "Large code model is unsupported\n";
- exit(1);
- }
-
- if (Clusters.size() == 1)
- return;
-
- // Populate one of the clusters with PLT functions based on the proximity of
- // the PLT section to avoid unneeded thunk redirection.
- // FIXME: this part is extremely fragile as it depends on the placement
- // of PLT section and its proximity to old or new .text.
- // FIXME: a slightly better approach will be to always use thunks for PLT and
- // eliminate redirection later using final addresses in address maps.
- const size_t PLTClusterNum = opts::UseOldText ? 1 : 0;
- for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
- if (BF.isPLTFunction()) {
- auto &PLTCluster = Clusters[PLTClusterNum];
- PLTCluster.Functions.insert(&BF);
- auto It = PLTCluster.Callees.find(&BF);
- if (It != PLTCluster.Callees.end())
- PLTCluster.Callees.erase(It);
- }
- }
-
- // FIXME: section name to use for thunks.
- std::string SectionName =
- Clusters[0].LastBF->getCodeSectionName().str().str();
-
- // Build thunk functions.
- auto createSmallThunk = [&](BinaryFunction &Callee) {
- BinaryFunction *ThunkBF =
- BC.createThunkBinaryFunction("__BThunk__" + Callee.getOneName().str());
- MCInst Inst;
- BC.MIB->createTailCall(Inst, Callee.getSymbol(), BC.Ctx.get());
- ThunkBF->addBasicBlock()->addInstruction(Inst);
- ThunkBF->setCodeSectionName(SectionName);
-
- return ThunkBF;
- };
-
- DenseMap<BinaryFunction *, BinaryFunction *> Thunks;
- for (const FunctionCluster &FC : Clusters) {
- for (BinaryFunction *Callee : FC.Callees) {
- Thunks[Callee] = createSmallThunk(*Callee);
- }
- }
-
- BC.outs() << "BOLT-INFO: " << Thunks.size() << " thunks created\n";
-
- // Replace callees with thunks.
- for (FunctionCluster &FC : Clusters) {
- for (BinaryFunction *BF : FC.Functions) {
- if (!CallMap.count(BF))
- continue;
-
- for (BinaryBasicBlock &BB : *BF) {
- for (MCInst &Inst : BB) {
- if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) ||
- BC.MIB->isIndirectBranch(Inst))
- continue;
- const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
- assert(TargetSymbol);
-
- BinaryFunction *Callee = BC.getFunctionForSymbol(TargetSymbol);
- if (!Callee) {
- /* Ignore internal calls */
- continue;
- }
-
- // Check if the callee is in the same cluster.
- if (!FC.Callees.count(Callee))
- continue;
-
- // Use thunk as the call destination.
- BC.MIB->replaceBranchTarget(Inst, Thunks[Callee]->getSymbol(),
- BC.Ctx.get());
- }
- }
- }
- }
-
- BC.setThunkLocation(Clusters[0].LastBF);
-}
-
Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
- // TODO: set correct code model based on the total size of split-code.
- if (opts::ExperimentalRelaxation) {
- BC.outs() << "BOLT-INFO: starting experimental relaxation pass\n";
- ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
- relaxLocalBranches(BF);
- };
-
- ParallelUtilities::PredicateTy SkipPredicate =
- [&](const BinaryFunction &BF) {
- return !BC.shouldEmit(BF) || !BF.isSimple();
- };
-
- ParallelUtilities::runOnEachFunction(
- BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
- SkipPredicate, "ExpLongJump");
-
- relaxCalls(BC);
-
- return Error::success();
- }
-
BC.outs() << "BOLT-INFO: Starting stub-insertion pass\n";
std::vector<BinaryFunction *> Sorted = BC.getSortedFunctions();
bool Modified;
diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp
index 1530d1076bb0..981d1b70af90 100644
--- a/bolt/lib/Passes/PatchEntries.cpp
+++ b/bolt/lib/Passes/PatchEntries.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "bolt/Passes/PatchEntries.h"
-#include "bolt/Utils/CommandLineOpts.h"
#include "bolt/Utils/NameResolver.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CommandLine.h"
@@ -36,20 +35,16 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
if (!opts::ForcePatch) {
// Mark the binary for patching if we did not create external references
// for original code in any of functions we are not going to emit.
- bool NeedsPatching =
- llvm::any_of(llvm::make_second_range(BC.getBinaryFunctions()),
- [&](BinaryFunction &BF) {
- return !BF.isPseudo() && !BC.shouldEmit(BF) &&
- !BF.hasExternalRefRelocations();
- });
+ bool NeedsPatching = llvm::any_of(
+ llvm::make_second_range(BC.getBinaryFunctions()),
+ [&](BinaryFunction &BF) {
+ return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations();
+ });
if (!NeedsPatching)
return Error::success();
}
- assert(!opts::UseOldText &&
- "Cannot patch entries while overwriting original .text");
-
if (opts::Verbosity >= 1)
BC.outs() << "BOLT-INFO: patching entries in original code\n";
diff --git a/bolt/lib/Passes/VeneerElimination.cpp b/bolt/lib/Passes/VeneerElimination.cpp
index 738538ef3c38..87fe625e8c3b 100644
--- a/bolt/lib/Passes/VeneerElimination.cpp
+++ b/bolt/lib/Passes/VeneerElimination.cpp
@@ -33,40 +33,26 @@ Error VeneerElimination::runOnFunctions(BinaryContext &BC) {
if (!opts::EliminateVeneers || !BC.isAArch64())
return Error::success();
+ std::map<uint64_t, BinaryFunction> &BFs = BC.getBinaryFunctions();
std::unordered_map<const MCSymbol *, const MCSymbol *> VeneerDestinations;
uint64_t VeneersCount = 0;
- uint64_t NumAllVeneers = 0;
- for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
- if (!BF.isAArch64Veneer())
+ for (auto &It : BFs) {
+ BinaryFunction &VeneerFunction = It.second;
+ if (!VeneerFunction.isAArch64Veneer())
continue;
- ++NumAllVeneers;
-
- if (BF.isIgnored())
- continue;
-
- MCInst &FirstInstruction = *(BF.begin()->begin());
- const MCSymbol *VeneerTargetSymbol;
- if (BC.MIB->isTailCall(FirstInstruction)) {
- VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction);
- } else {
- if (!BC.MIB->hasAnnotation(FirstInstruction, "AArch64Veneer"))
- continue;
- VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction, 1);
- }
-
- if (!VeneerTargetSymbol)
- continue;
-
- for (const MCSymbol *Symbol : BF.getSymbols())
- VeneerDestinations[Symbol] = VeneerTargetSymbol;
-
VeneersCount++;
- BF.setPseudo(true);
+ VeneerFunction.setPseudo(true);
+ MCInst &FirstInstruction = *(VeneerFunction.begin()->begin());
+ const MCSymbol *VeneerTargetSymbol =
+ BC.MIB->getTargetSymbol(FirstInstruction, 1);
+ assert(VeneerTargetSymbol && "Expecting target symbol for instruction");
+ for (const MCSymbol *Symbol : VeneerFunction.getSymbols())
+ VeneerDestinations[Symbol] = VeneerTargetSymbol;
}
BC.outs() << "BOLT-INFO: number of removed linker-inserted veneers: "
- << VeneersCount << ". Total veneers: " << NumAllVeneers << '\n';
+ << VeneersCount << "\n";
// Handle veneers to veneers in case they occur
for (auto &Entry : VeneerDestinations) {
@@ -79,8 +65,9 @@ Error VeneerElimination::runOnFunctions(BinaryContext &BC) {
}
uint64_t VeneerCallers = 0;
- for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
- for (BinaryBasicBlock &BB : BF) {
+ for (auto &It : BFs) {
+ BinaryFunction &Function = It.second;
+ for (BinaryBasicBlock &BB : Function) {
for (MCInst &Instr : BB) {
if (!BC.MIB->isCall(Instr) || BC.MIB->isIndirectCall(Instr))
continue;
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index 334252cbd360..ec7e303c0f52 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -9,22 +9,12 @@
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/Timer.h"
#define DEBUG_TYPE "bolt-bat"
-namespace opts {
-extern llvm::cl::OptionCategory BoltCategory;
-llvm::cl::opt<bool>
- TimeBAT("time-bat",
- llvm::cl::desc("print time spent processing BAT tables"),
- llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
-} // namespace opts
-
namespace llvm {
namespace bolt {
@@ -85,9 +75,8 @@ void BoltAddressTranslation::writeEntriesForBB(
}
}
-void BoltAddressTranslation::constructMaps(const BinaryContext &BC) {
- NamedRegionTimer T("constuctmaps", "construct translation maps", "bat",
- "process BAT", opts::TimeBAT);
+void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
for (auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
const uint64_t InputAddress = Function.getAddress();
@@ -151,11 +140,6 @@ void BoltAddressTranslation::constructMaps(const BinaryContext &BC) {
Maps.emplace(FF.getAddress(), std::move(Map));
}
}
-}
-
-void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
- constructMaps(BC);
// Output addresses are delta-encoded
uint64_t PrevAddress = 0;
@@ -200,8 +184,6 @@ size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
template <bool Cold>
void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
uint64_t &PrevAddress, raw_ostream &OS) {
- NamedRegionTimer T("writemaps", "write translation maps", "bat",
- "process BAT", opts::TimeBAT);
const uint32_t NumFuncs =
llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
return Cold == ColdPartSource.count(Address);
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 67ed32017667..fe0fcfdcd42f 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -643,11 +643,7 @@ size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
// equal number of blocks.
if (NamespaceToProfiledBFSizesIt->second.count(BF->size()) == 0)
continue;
- auto NamespaceToBFsIt = NamespaceToBFs.find(Namespace);
- if (NamespaceToBFsIt == NamespaceToBFs.end())
- NamespaceToBFs[Namespace] = {BF};
- else
- NamespaceToBFsIt->second.push_back(BF);
+ NamespaceToBFs[Namespace].push_back(BF);
}
// Iterates through all profiled functions and binary functions belonging to
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 17e08324024d..b09060418334 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -12,6 +12,7 @@
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
#include "bolt/Passes/CMOVConversion.h"
+#include "bolt/Passes/ContinuityStats.h"
#include "bolt/Passes/FixRISCVCallsPass.h"
#include "bolt/Passes/FixRelaxationPass.h"
#include "bolt/Passes/FrameOptimizer.h"
@@ -373,6 +374,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
if (opts::PrintProfileStats)
Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
+ Manager.registerPass(std::make_unique<PrintContinuityStats>(NeverPrint));
+
Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
@@ -489,9 +492,6 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
// memory profiling data.
Manager.registerPass(std::make_unique<ReorderData>());
- // Assign each function an output section.
- Manager.registerPass(std::make_unique<AssignSections>());
-
if (BC.isAArch64()) {
Manager.registerPass(std::make_unique<ADRRelaxationPass>());
@@ -515,6 +515,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(
std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
+ // Assign each function an output section.
+ Manager.registerPass(std::make_unique<AssignSections>());
+
// Patch original function entries
if (BC.HasRelocations)
Manager.registerPass(std::make_unique<PatchEntries>());
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 849928910996..f9cb1b3895e7 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -42,7 +42,6 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/ThreadPool.h"
-#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
@@ -368,11 +367,6 @@ static cl::opt<bool> AlwaysConvertToRanges(
cl::ReallyHidden, cl::init(false), cl::cat(BoltCategory));
extern cl::opt<std::string> CompDirOverride;
-
-cl::opt<bool>
- TimeDebug("time-debug",
- cl::desc("print time spent processing debug information"),
- cl::Hidden, cl::cat(BoltCategory));
} // namespace opts
/// If DW_AT_low_pc exists sets LowPC and returns true.
@@ -553,8 +547,6 @@ using CUPartitionVector = std::vector<DWARFUnitVec>;
/// cu-processing-batch-size. All the CUs that have cross CU reference reference
/// as a source are put in to the same initial bucket.
static CUPartitionVector partitionCUs(DWARFContext &DwCtx) {
- NamedRegionTimer T("partitioncus", "partition cus", "debug",
- "update debug info", opts::TimeDebug);
CUPartitionVector Vec(2);
unsigned Counter = 0;
const DWARFDebugAbbrev *Abbr = DwCtx.getDebugAbbrev();
@@ -615,9 +607,11 @@ void DWARFRewriter::updateDebugInfo() {
}
uint32_t CUIndex = 0;
+ std::mutex AccessMutex;
// Needs to be invoked in the same order as CUs are processed.
llvm::DenseMap<uint64_t, uint64_t> LocListWritersIndexByCU;
auto createRangeLocListAddressWriters = [&](DWARFUnit &CU) {
+ std::lock_guard<std::mutex> Lock(AccessMutex);
const uint16_t DwarfVersion = CU.getVersion();
if (DwarfVersion >= 5) {
auto AddrW = std::make_unique<DebugAddrWriterDwarf5>(
@@ -683,8 +677,6 @@ void DWARFRewriter::updateDebugInfo() {
GDBIndexSection, TempRangesSectionWriter);
};
auto processMainBinaryCU = [&](DWARFUnit &Unit, DIEBuilder &DIEBlder) {
- NamedRegionTimer T("processmainbinarycu", "process main binary CU",
- "debug", "update debug info", opts::TimeDebug);
std::optional<DWARFUnit *> SplitCU;
std::optional<uint64_t> RangesBase;
std::optional<uint64_t> DWOId = Unit.getDWOId();
@@ -717,11 +709,7 @@ void DWARFRewriter::updateDebugInfo() {
};
DIEBuilder DIEBlder(BC, BC.DwCtx.get(), DebugNamesTable);
- {
- NamedRegionTimer T("buildtypeunits", "build type units", "debug",
- "update debug info", opts::TimeDebug);
- DIEBlder.buildTypeUnits(StrOffstsWriter.get());
- }
+ DIEBlder.buildTypeUnits(StrOffstsWriter.get());
SmallVector<char, 20> OutBuffer;
std::unique_ptr<raw_svector_ostream> ObjOS =
std::make_unique<raw_svector_ostream>(OutBuffer);
@@ -735,9 +723,8 @@ void DWARFRewriter::updateDebugInfo() {
CUPartitionVector PartVec = partitionCUs(*BC.DwCtx);
const unsigned int ThreadCount =
std::min(opts::DebugThreadCount, opts::ThreadCount);
- auto updateSplitCUs = [&]() {
- NamedRegionTimer T("updatesplitcus", "update split CUs", "debug",
- "update debug info", opts::TimeDebug);
+ for (std::vector<DWARFUnit *> &Vec : PartVec) {
+ DIEBlder.buildCompileUnits(Vec);
llvm::SmallVector<std::unique_ptr<DIEBuilder>, 72> DWODIEBuildersByCU;
ThreadPoolInterface &ThreadPool =
ParallelUtilities::getThreadPool(ThreadCount);
@@ -777,13 +764,6 @@ void DWARFRewriter::updateDebugInfo() {
});
}
ThreadPool.wait();
- return DWODIEBuildersByCU;
- };
-
- for (std::vector<DWARFUnit *> &Vec : PartVec) {
- DIEBlder.buildCompileUnits(Vec);
- llvm::SmallVector<std::unique_ptr<DIEBuilder>, 72> DWODIEBuildersByCU =
- updateSplitCUs();
for (std::unique_ptr<DIEBuilder> &DWODIEBuilderPtr : DWODIEBuildersByCU)
DWODIEBuilderPtr->updateDebugNamesTable();
for (DWARFUnit *CU : DIEBlder.getProcessedCUs())
@@ -1471,8 +1451,6 @@ void DWARFRewriter::updateLineTableOffsets(const MCAssembler &Asm) {
CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder,
DIEStreamer &Streamer,
GDBIndex &GDBIndexSection) {
- NamedRegionTimer T("finalizetypesections", "finalize type sections",
- "debug", "update debug info", opts::TimeDebug);
// update TypeUnit DW_AT_stmt_list with new .debug_line information.
auto updateLineTable = [&](const DWARFUnit &Unit) -> void {
DIE *UnitDIE = DIEBlder.getUnitDIEbyUnit(Unit);
@@ -1538,8 +1516,6 @@ void DWARFRewriter::finalizeDebugSections(
DIEBuilder &DIEBlder, DWARF5AcceleratorTable &DebugNamesTable,
DIEStreamer &Streamer, raw_svector_ostream &ObjOS, CUOffsetMap &CUMap,
DebugAddrWriter &FinalAddrWriter) {
- NamedRegionTimer T("finalizedebugsections", "finalize debug sections",
- "debug", "update debug info", opts::TimeDebug);
if (StrWriter->isInitialized()) {
RewriteInstance::addToDebugSectionsToOverwrite(".debug_str");
std::unique_ptr<DebugStrBufferVector> DebugStrSectionContents =
@@ -1652,8 +1628,6 @@ void DWARFRewriter::finalizeCompileUnits(DIEBuilder &DIEBlder,
CUOffsetMap &CUMap,
const std::list<DWARFUnit *> &CUs,
DebugAddrWriter &FinalAddrWriter) {
- NamedRegionTimer T("finalizecompileunits", "finalize compile units",
- "debug", "update debug info", opts::TimeDebug);
for (DWARFUnit *CU : CUs) {
auto AddressWriterIterator = AddressWritersByCU.find(CU->getOffset());
assert(AddressWriterIterator != AddressWritersByCU.end() &&
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 8532ec85ebbf..8647df4b0edf 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -20,7 +20,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/Timer.h"
#include <memory>
#undef DEBUG_TYPE
@@ -31,10 +30,6 @@ using namespace bolt;
namespace opts {
-cl::opt<bool> TimeProbes("time-probes",
- cl::desc("print time spent processing pseudo probes"),
- cl::Hidden, cl::cat(BoltCategory));
-
enum PrintPseudoProbesOptions {
PPP_None = 0,
PPP_Probes_Section_Decode = 0x1,
@@ -108,9 +103,6 @@ Error PseudoProbeRewriter::postEmitFinalizer() {
parsePseudoProbe();
updatePseudoProbes();
- // encode pseudo probes with updated addresses
- encodePseudoProbes();
-
return Error::success();
}
@@ -124,8 +116,6 @@ void PseudoProbeRewriter::parsePseudoProbe(bool ProfiledOnly) {
return;
}
- NamedRegionTimer T("parseprobes", "parse pseudo probes", "probes",
- "process pseudo probes", opts::TimeProbes);
// If only one section is found, it might mean the ELF is corrupted.
if (!PseudoProbeDescSection) {
errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n";
@@ -206,8 +196,6 @@ void PseudoProbeRewriter::updatePseudoProbes() {
// check if there is pseudo probe section decoded
if (ProbeDecoder.getAddress2ProbesMap().empty())
return;
- NamedRegionTimer T("updateprobes", "update pseudo probes", "probes",
- "process pseudo probes", opts::TimeProbes);
// input address converted to output
AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap();
const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap();
@@ -284,15 +272,13 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}
outs() << "=======================================\n";
}
+
+ // encode pseudo probes with updated addresses
+ encodePseudoProbes();
}
void PseudoProbeRewriter::encodePseudoProbes() {
MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr);
- // check if there is pseudo probe section decoded
- if (ProbeDecoder.getAddress2ProbesMap().empty())
- return;
- NamedRegionTimer T("encodeprobes", "encode pseudo probes", "probes",
- "process pseudo probes", opts::TimeProbes);
// Buffer for new pseudo probes section
SmallString<8> Contents;
MCDecodedPseudoProbe *LastProbe = nullptr;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index ba27aa817e1e..32ec7abe8b66 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3557,11 +3557,9 @@ void RewriteInstance::finalizeMetadataPreEmit() {
}
void RewriteInstance::updateMetadata() {
- {
- NamedRegionTimer T("updatemetadata-postemit", "update metadata post-emit",
- TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
- MetadataManager.runFinalizersAfterEmit();
- }
+ NamedRegionTimer T("updatemetadata-postemit", "update metadata post-emit",
+ TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
+ MetadataManager.runFinalizersAfterEmit();
if (opts::UpdateDebugSections) {
NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName,
@@ -3723,41 +3721,15 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) {
return Address;
};
- // Try to allocate sections before the \p Address and return an address for
- // the allocation of the first section or 0 if \p is not big enough.
- auto allocateBefore = [&](uint64_t Address) -> uint64_t {
- for (auto SI = CodeSections.rbegin(), SE = CodeSections.rend(); SI != SE;
- ++SI) {
- BinarySection *Section = *SI;
- if (Section->getOutputSize() > Address)
- return 0;
- Address -= Section->getOutputSize();
- Address = alignDown(Address, Section->getAlignment());
- Section->setOutputAddress(Address);
- }
- return Address;
- };
-
// Check if we can fit code in the original .text
bool AllocationDone = false;
if (opts::UseOldText) {
- uint64_t StartAddress;
- uint64_t EndAddress;
- if (opts::HotFunctionsAtEnd) {
- EndAddress = BC->OldTextSectionAddress + BC->OldTextSectionSize;
- StartAddress = allocateBefore(EndAddress);
- } else {
- StartAddress = BC->OldTextSectionAddress;
- EndAddress = allocateAt(BC->OldTextSectionAddress);
- }
+ const uint64_t CodeSize =
+ allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress;
- const uint64_t CodeSize = EndAddress - StartAddress;
if (CodeSize <= BC->OldTextSectionSize) {
BC->outs() << "BOLT-INFO: using original .text for new code with 0x"
- << Twine::utohexstr(opts::AlignText) << " alignment";
- if (StartAddress != BC->OldTextSectionAddress)
- BC->outs() << " at 0x" << Twine::utohexstr(StartAddress);
- BC->outs() << '\n';
+ << Twine::utohexstr(opts::AlignText) << " alignment\n";
AllocationDone = true;
} else {
BC->errs()
@@ -5563,8 +5535,6 @@ uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) {
}
void RewriteInstance::rewriteFile() {
- NamedRegionTimer T("rewrite", "rewrite file", TimerGroupName,
- TimerGroupDesc, opts::TimeRewrite);
std::error_code EC;
Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC,
sys::fs::OF_None);