summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineLICM.cpp
diff options
context:
space:
mode:
authorVitaly Buka <vitalybuka@google.com>2024-06-24 23:42:47 -0700
committerVitaly Buka <vitalybuka@google.com>2024-06-24 23:42:47 -0700
commit748fe87a8e0ccce436e9c94d8d163f9e03c81b48 (patch)
tree508d1619316f1cda23b7f7844a1c3b19d77face2 /llvm/lib/CodeGen/MachineLICM.cpp
parent46b525389c09ae37c34648c7f108374b815a6507 (diff)
parent43d207addaf4111dd6a4e0e702e8797587ce61ba (diff)
[𝘀𝗽𝗿] changes introduced through rebaseusers/vitalybuka/spr/main.sanitizer-rename-define_real_pthread_functions
Created using spr 1.3.4 [skip ci]
Diffstat (limited to 'llvm/lib/CodeGen/MachineLICM.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp60
1 files changed, 38 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 6c5170e918e0..287bd00aeba8 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -426,38 +426,54 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI,
BitVector &RUs,
const uint32_t *Mask) {
- // Iterate over the RegMask raw to avoid constructing a BitVector, which is
- // expensive as it implies dynamically allocating memory.
+ // FIXME: This intentionally works in reverse due to some issues with the
+ // Register Units infrastructure.
//
- // We also work backwards.
+ // This is used to apply callee-saved-register masks to the clobbered regunits
+ // mask.
+ //
+ // The right way to approach this is to start with a BitVector full of ones,
+ // then reset all the bits of the regunits of each register that is set in the
+ // mask (registers preserved), then OR the resulting bits with the Clobbers
+ // mask. This correctly prioritizes the saved registers, so if a RU is shared
+ // between a register that is preserved, and one that is NOT preserved, that
+ // RU will not be set in the output vector (the clobbers).
+ //
+ // What we have to do for now is the opposite: we have to assume that the
+ // regunits of all registers that are NOT preserved are clobbered, even if
+ // those regunits are preserved by another register. So if a RU is shared
+ // like described previously, that RU will be set.
+ //
+ // This is to work around an issue which appears in AArch64, but isn't
+ // exclusive to that target: AArch64's Qn registers (128 bits) have Dn
+ // register (lower 64 bits). A few Dn registers are preserved by some calling
+ // conventions, but Qn and Dn share exactly the same reg units.
+ //
+ // If we do this the right way, Qn will be marked as NOT clobbered even though
+ // its upper 64 bits are NOT preserved. The conservative approach handles this
+ // correctly at the cost of some missed optimizations on other targets.
+ //
+ // This is caused by how RegUnits are handled within TableGen. Ideally, Qn
+ // should have an extra RegUnit to model the "unknown" bits not covered by the
+ // subregs.
+ BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());
const unsigned NumRegs = TRI.getNumRegs();
const unsigned MaskWords = (NumRegs + 31) / 32;
for (unsigned K = 0; K < MaskWords; ++K) {
- // We want to set the bits that aren't in RegMask, so flip it.
- uint32_t Word = ~Mask[K];
-
- // Iterate all set bits, starting from the right.
- while (Word) {
- const unsigned SetBitIdx = countr_zero(Word);
-
- // The bits are numbered from the LSB in each word.
- const unsigned PhysReg = (K * 32) + SetBitIdx;
-
- // Clear the bit at SetBitIdx. Doing it this way appears to generate less
- // instructions on x86. This works because negating a number will flip all
- // the bits after SetBitIdx. So (Word & -Word) == (1 << SetBitIdx), but
- // faster.
- Word ^= Word & -Word;
-
+ const uint32_t Word = Mask[K];
+ for (unsigned Bit = 0; Bit < 32; ++Bit) {
+ const unsigned PhysReg = (K * 32) + Bit;
if (PhysReg == NumRegs)
- return;
+ break;
- if (PhysReg) {
+ if (PhysReg && !((Word >> Bit) & 1)) {
for (MCRegUnitIterator RUI(PhysReg, &TRI); RUI.isValid(); ++RUI)
- RUs.set(*RUI);
+ RUsFromRegsNotInMask.set(*RUI);
}
}
}
+
+ RUs |= RUsFromRegsNotInMask;
}
/// Examine the instruction for potentai LICM candidate. Also