1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
|
//===- bolt/Target/X86/X86MCSymbolizer.cpp --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "X86MCSymbolizer.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Core/Relocation.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#define DEBUG_TYPE "bolt-symbolizer"
namespace llvm {
namespace bolt {
X86MCSymbolizer::~X86MCSymbolizer() {}
bool X86MCSymbolizer::tryAddingSymbolicOperand(
MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
if (IsBranch)
return false;
// Ignore implicit operands.
if (ImmSize == 0)
return false;
BinaryContext &BC = Function.getBinaryContext();
MCContext *Ctx = BC.Ctx.get();
if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
return false;
/// Add symbolic operand to the instruction with an optional addend.
auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend) {
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
if (Addend)
Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
*Ctx);
Inst.addOperand(MCOperand::createExpr(Expr));
};
// Check if the operand being added is a displacement part of a compound
// memory operand that uses PC-relative addressing. If it is, try to symbolize
// it without relocations. Return true on success, false otherwise.
auto processPCRelOperandNoRel = [&]() {
const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
if (MemOp == -1)
return false;
const unsigned DispOp = MemOp + X86::AddrDisp;
if (Inst.getNumOperands() != DispOp)
return false;
const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
if (Base.getReg() != BC.MRI->getProgramCounter())
return false;
const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
return false;
const MCSymbol *TargetSymbol;
uint64_t TargetOffset;
if (!CreateNewSymbols) {
if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
TargetSymbol = BD->getSymbol();
TargetOffset = Value - BD->getAddress();
} else {
return false;
}
} else {
std::tie(TargetSymbol, TargetOffset) =
BC.handleAddressRef(Value, Function, /*IsPCRel=*/true);
}
addOperand(TargetSymbol, TargetOffset);
return true;
};
// Check for GOTPCRELX relocations first. Because these relocations allow the
// linker to modify the instruction, we have to check the offset range
// corresponding to the instruction, not the offset of the operand.
// Note that if there is GOTPCRELX relocation against the instruction, there
// will be no other relocation in this range, since GOTPCRELX applies only to
// certain instruction types.
const uint64_t InstOffset = InstAddress - Function.getAddress();
const Relocation *Relocation =
Function.getRelocationInRange(InstOffset, InstOffset + InstSize);
if (Relocation && Relocation::isX86GOTPCRELX(Relocation->Type)) {
// If the operand is PC-relative, convert it without using the relocation
// information. For GOTPCRELX, it is safe to use the absolute address
// instead of extracting the addend from the relocation, as non-standard
// forms will be rejected by linker conversion process and the operand
// will always reference GOT which we don't rewrite.
if (processPCRelOperandNoRel())
return true;
// The linker converted the PC-relative address to an absolute one.
// Symbolize this address.
if (CreateNewSymbols)
BC.handleAddressRef(Value, Function, /*IsPCRel=*/false);
const BinaryData *Target = BC.getBinaryDataAtAddress(Value);
if (!Target) {
assert(!CreateNewSymbols &&
"BinaryData should exist at converted GOTPCRELX destination");
return false;
}
addOperand(Target->getSymbol(), /*Addend=*/0);
return true;
}
// Check for relocations against the operand.
if (!Relocation || Relocation->Offset != InstOffset + ImmOffset)
Relocation = Function.getRelocationAt(InstOffset + ImmOffset);
if (!Relocation)
return processPCRelOperandNoRel();
// GOTPC64 is special because the X86 Assembler doesn't know how to emit
// a PC-relative 8-byte fixup, which is what we need to cover this. The
// only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_.
if (Relocation::isX86GOTPC64(Relocation->Type)) {
auto PairOrErr = handleGOTPC64(*Relocation, InstAddress);
if (auto E = PairOrErr.takeError()) {
Function.setSimple(false);
BC.logBOLTErrorsAndQuitOnFatal(std::move(E));
return false;
}
auto [Sym, Addend] = *PairOrErr;
addOperand(Sym, Addend);
return true;
}
uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
if (Relocation->isPCRelative())
SymbolValue += InstAddress + ImmOffset;
// Process reference to the symbol.
if (CreateNewSymbols)
BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());
uint64_t Addend = Relocation->Addend;
// Real addend for pc-relative targets is adjusted with a delta from
// the relocation placement to the next instruction.
if (Relocation->isPCRelative())
Addend += InstOffset + InstSize - Relocation->Offset;
addOperand(Relocation->Symbol, Addend);
return true;
}
Expected<std::pair<MCSymbol *, uint64_t>>
X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) {
BinaryContext &BC = Function.getBinaryContext();
const BinaryData *GOTSymBD = BC.getGOTSymbol();
if (!GOTSymBD || !GOTSymBD->getAddress()) {
// This error is pretty serious but we can't kill the disassembler
// because of it, so don't make it fatal. Log it and warn the user.
return createNonFatalBOLTError(
"R_X86_GOTPC64 relocation is present but we did not detect "
"a valid _GLOBAL_OFFSET_TABLE_ in symbol table\n");
}
// R_X86_GOTPC64 are not relative to the Reloc nor end of instruction,
// but the start of the MOVABSQ instruction. So the Target Address is
// whatever is encoded in the original operand when we disassembled
// the binary (here, R.Value) plus MOVABSQ address (InstrAddr).
// Here we extract the intended Addend by subtracting the real
// GOT addr.
const int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress();
return std::make_pair(BC.Ctx->getOrCreateSymbol("_GLOBAL_OFFSET_TABLE_"),
Addend);
}
void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
int64_t Value,
uint64_t Address) {}
} // namespace bolt
} // namespace llvm
|