summaryrefslogtreecommitdiff
path: root/bolt/runtime/instr.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'bolt/runtime/instr.cpp')
-rw-r--r--bolt/runtime/instr.cpp115
1 files changed, 69 insertions, 46 deletions
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index ae356e71cbe4..a42750cef6b6 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -568,13 +568,13 @@ struct FunctionDescription {
/// should be straightforward as most data is POD or an array of POD elements.
/// This metadata is used to reconstruct function CFGs.
struct ProfileWriterContext {
- IndCallDescription *IndCallDescriptions;
- IndCallTargetDescription *IndCallTargets;
- uint8_t *FuncDescriptions;
- char *Strings; // String table with function names used in this binary
+ const IndCallDescription *IndCallDescriptions;
+ const IndCallTargetDescription *IndCallTargets;
+ const uint8_t *FuncDescriptions;
+ const char *Strings; // String table with function names used in this binary
int FileDesc; // File descriptor for the file on disk backing this
// information in memory via mmap
- void *MMapPtr; // The mmap ptr
+ const void *MMapPtr; // The mmap ptr
int MMapSize; // The mmap size
/// Hash table storing all possible call destinations to detect untracked
@@ -672,14 +672,15 @@ bool parseAddressRange(const char *Str, uint64_t &StartAddress,
return true;
}
+static constexpr uint32_t NameMax = 4096;
+static char TargetPath[NameMax] = {};
+
/// Get full path to the real binary by getting current virtual address
/// and searching for the appropriate link in address range in
/// /proc/self/map_files
static char *getBinaryPath() {
const uint32_t BufSize = 1024;
- const uint32_t NameMax = 4096;
const char DirPath[] = "/proc/self/map_files/";
- static char TargetPath[NameMax] = {};
char Buf[BufSize];
if (__bolt_instr_binpath[0] != '\0')
@@ -719,36 +720,46 @@ static char *getBinaryPath() {
return nullptr;
}
-ProfileWriterContext readDescriptions() {
+ProfileWriterContext readDescriptions(const uint8_t *BinContents,
+ uint64_t Size) {
ProfileWriterContext Result;
- char *BinPath = getBinaryPath();
- assert(BinPath && BinPath[0] != '\0', "failed to find binary path");
- uint64_t FD = __open(BinPath, O_RDONLY,
- /*mode=*/0666);
- assert(static_cast<int64_t>(FD) >= 0, "failed to open binary path");
+ assert((BinContents == nullptr) == (Size == 0),
+ "either empty or valid library content buffer");
+
+ if (BinContents) {
+ Result.FileDesc = -1;
+ } else {
+ const char *BinPath = getBinaryPath();
+ assert(BinPath && BinPath[0] != '\0', "failed to find binary path");
+
+ uint64_t FD = __open(BinPath, O_RDONLY,
+ /*mode=*/0666);
+ assert(static_cast<int64_t>(FD) >= 0, "failed to open binary path");
- Result.FileDesc = FD;
+ Result.FileDesc = FD;
- // mmap our binary to memory
- uint64_t Size = __lseek(FD, 0, SEEK_END);
- uint8_t *BinContents = reinterpret_cast<uint8_t *>(
- __mmap(0, Size, PROT_READ, MAP_PRIVATE, FD, 0));
- assert(BinContents != MAP_FAILED, "readDescriptions: Failed to mmap self!");
+ // mmap our binary to memory
+ Size = __lseek(FD, 0, SEEK_END);
+ BinContents = reinterpret_cast<uint8_t *>(
+ __mmap(0, Size, PROT_READ, MAP_PRIVATE, FD, 0));
+ assert(BinContents != MAP_FAILED, "readDescriptions: Failed to mmap self!");
+ }
Result.MMapPtr = BinContents;
Result.MMapSize = Size;
- Elf64_Ehdr *Hdr = reinterpret_cast<Elf64_Ehdr *>(BinContents);
- Elf64_Shdr *Shdr = reinterpret_cast<Elf64_Shdr *>(BinContents + Hdr->e_shoff);
- Elf64_Shdr *StringTblHeader = reinterpret_cast<Elf64_Shdr *>(
+ const Elf64_Ehdr *Hdr = reinterpret_cast<const Elf64_Ehdr *>(BinContents);
+ const Elf64_Shdr *Shdr =
+ reinterpret_cast<const Elf64_Shdr *>(BinContents + Hdr->e_shoff);
+ const Elf64_Shdr *StringTblHeader = reinterpret_cast<const Elf64_Shdr *>(
BinContents + Hdr->e_shoff + Hdr->e_shstrndx * Hdr->e_shentsize);
// Find .bolt.instr.tables with the data we need and set pointers to it
for (int I = 0; I < Hdr->e_shnum; ++I) {
- char *SecName = reinterpret_cast<char *>(
+ const char *SecName = reinterpret_cast<const char *>(
BinContents + StringTblHeader->sh_offset + Shdr->sh_name);
if (compareStr(SecName, ".bolt.instr.tables", 64) != 0) {
- Shdr = reinterpret_cast<Elf64_Shdr *>(BinContents + Hdr->e_shoff +
- (I + 1) * Hdr->e_shentsize);
+ Shdr = reinterpret_cast<const Elf64_Shdr *>(BinContents + Hdr->e_shoff +
+ (I + 1) * Hdr->e_shentsize);
continue;
}
// Actual contents of the ELF note start after offset 20 decimal:
@@ -758,19 +769,19 @@ ProfileWriterContext readDescriptions() {
// Offset 12: Producer name (BOLT\0) (5 bytes + align to 4-byte boundary)
// Offset 20: Contents
uint32_t IndCallDescSize =
- *reinterpret_cast<uint32_t *>(BinContents + Shdr->sh_offset + 20);
- uint32_t IndCallTargetDescSize = *reinterpret_cast<uint32_t *>(
+ *reinterpret_cast<const uint32_t *>(BinContents + Shdr->sh_offset + 20);
+ uint32_t IndCallTargetDescSize = *reinterpret_cast<const uint32_t *>(
BinContents + Shdr->sh_offset + 24 + IndCallDescSize);
- uint32_t FuncDescSize =
- *reinterpret_cast<uint32_t *>(BinContents + Shdr->sh_offset + 28 +
- IndCallDescSize + IndCallTargetDescSize);
- Result.IndCallDescriptions = reinterpret_cast<IndCallDescription *>(
+ uint32_t FuncDescSize = *reinterpret_cast<const uint32_t *>(
+ BinContents + Shdr->sh_offset + 28 + IndCallDescSize +
+ IndCallTargetDescSize);
+ Result.IndCallDescriptions = reinterpret_cast<const IndCallDescription *>(
BinContents + Shdr->sh_offset + 24);
- Result.IndCallTargets = reinterpret_cast<IndCallTargetDescription *>(
+ Result.IndCallTargets = reinterpret_cast<const IndCallTargetDescription *>(
BinContents + Shdr->sh_offset + 28 + IndCallDescSize);
Result.FuncDescriptions = BinContents + Shdr->sh_offset + 32 +
IndCallDescSize + IndCallTargetDescSize;
- Result.Strings = reinterpret_cast<char *>(
+ Result.Strings = reinterpret_cast<const char *>(
BinContents + Shdr->sh_offset + 32 + IndCallDescSize +
IndCallTargetDescSize + FuncDescSize);
return Result;
@@ -814,13 +825,14 @@ void printStats(const ProfileWriterContext &Ctx) {
strCopy(StatPtr,
"\nBOLT INSTRUMENTATION RUNTIME STATISTICS\n\nIndCallDescSize: ");
StatPtr = intToStr(StatPtr,
- Ctx.FuncDescriptions -
- reinterpret_cast<uint8_t *>(Ctx.IndCallDescriptions),
+ Ctx.FuncDescriptions - reinterpret_cast<const uint8_t *>(
+ Ctx.IndCallDescriptions),
10);
StatPtr = strCopy(StatPtr, "\nFuncDescSize: ");
- StatPtr = intToStr(
- StatPtr,
- reinterpret_cast<uint8_t *>(Ctx.Strings) - Ctx.FuncDescriptions, 10);
+ StatPtr = intToStr(StatPtr,
+ reinterpret_cast<const uint8_t *>(Ctx.Strings) -
+ Ctx.FuncDescriptions,
+ 10);
StatPtr = strCopy(StatPtr, "\n__bolt_instr_num_ind_calls: ");
StatPtr = intToStr(StatPtr, __bolt_instr_num_ind_calls, 10);
StatPtr = strCopy(StatPtr, "\n__bolt_instr_num_funcs: ");
@@ -1507,7 +1519,7 @@ extern "C" void __bolt_instr_clear_counters() {
}
/// This is the entry point for profile writing.
-/// There are three ways of getting here:
+/// There are four ways of getting here:
///
/// * Program execution ended, finalization methods are running and BOLT
/// hooked into FINI from your binary dynamic section;
@@ -1516,9 +1528,18 @@ extern "C" void __bolt_instr_clear_counters() {
/// * BOLT prints this function address so you can attach a debugger and
/// call this function directly to get your profile written to disk
/// on demand.
+/// * Application can, at interesting runtime point, iterate through all
+/// the loaded native libraries and for each call dlopen() and dlsym()
+/// to get a pointer to this function and call through the acquired
+/// function pointer to dump profile data.
///
extern "C" void __attribute((force_align_arg_pointer))
-__bolt_instr_data_dump(int FD) {
+__bolt_instr_data_dump(int FD, const char *LibPath = nullptr,
+ const uint8_t *LibContents = nullptr,
+ uint64_t LibSize = 0) {
+ if (LibPath)
+ strCopy(TargetPath, LibPath, NameMax);
+
// Already dumping
if (!GlobalWriteProfileMutex->acquire())
return;
@@ -1529,7 +1550,7 @@ __bolt_instr_data_dump(int FD) {
assert(ret == 0, "Failed to ftruncate!");
BumpPtrAllocator HashAlloc;
HashAlloc.setMaxSize(0x6400000);
- ProfileWriterContext Ctx = readDescriptions();
+ ProfileWriterContext Ctx = readDescriptions(LibContents, LibSize);
Ctx.CallFlowTable = new (HashAlloc, 0) CallFlowHashTable(HashAlloc);
DEBUG(printStats(Ctx));
@@ -1549,8 +1570,10 @@ __bolt_instr_data_dump(int FD) {
Ctx.CallFlowTable->forEachElement(visitCallFlowEntry, FD, &Ctx);
__fsync(FD);
- __munmap(Ctx.MMapPtr, Ctx.MMapSize);
- __close(Ctx.FileDesc);
+ if (Ctx.FileDesc != -1) {
+ __munmap((void *)Ctx.MMapPtr, Ctx.MMapSize);
+ __close(Ctx.FileDesc);
+ }
HashAlloc.destroy();
GlobalWriteProfileMutex->release();
DEBUG(report("Finished writing profile.\n"));
@@ -1756,7 +1779,7 @@ extern "C" __attribute((naked)) void __bolt_instr_start()
"jal x1, __bolt_instr_setup\n"
RESTORE_ALL
"setup_symbol:\n"
- "auipc x5, %%pcrel_hi(__bolt_start_trampoline)\n"
+ "auipc x5, %%pcrel_hi(__bolt_start_trampoline)\n"
"addi x5, x5, %%pcrel_lo(setup_symbol)\n"
"jr x5\n"
:::);
@@ -1788,8 +1811,8 @@ extern "C" void __bolt_instr_fini() {
__asm__ __volatile__(
SAVE_ALL
"fini_symbol:\n"
- "auipc x5, %%pcrel_hi(__bolt_fini_trampoline)\n"
- "addi x5, x5, %%pcrel_lo(fini_symbol)\n"
+ "auipc x5, %%pcrel_hi(__bolt_fini_trampoline)\n"
+ "addi x5, x5, %%pcrel_lo(fini_symbol)\n"
"jalr x1, 0(x5)\n"
RESTORE_ALL
:::);