summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp178
1 files changed, 0 insertions, 178 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index e06a811ba5a2..8de3f8db84ae 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -226,14 +226,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and merge an index ldr/st instruction into a base ld/st instruction.
bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
- // Finds and collapses loads of symmetric constant value.
- bool tryFoldSymmetryConstantLoad(MachineBasicBlock::iterator &I,
- unsigned Limit);
- MachineBasicBlock::iterator
- doFoldSymmetryConstantLoad(MachineInstr &MI,
- SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
- int UpperLoadIdx, int Accumulated);
-
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -2451,155 +2443,6 @@ AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
return E;
}
-static bool isSymmetricLoadCandidate(MachineInstr &MI, Register BaseReg) {
- auto MatchBaseReg = [&](unsigned Count) {
- for (unsigned I = 0; I < Count; I++) {
- auto OpI = MI.getOperand(I);
- if (OpI.isReg() && OpI.getReg() != BaseReg)
- return false;
- }
- return true;
- };
-
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- default:
- return false;
- case AArch64::MOVZXi:
- return MatchBaseReg(1);
- case AArch64::MOVKXi:
- return MatchBaseReg(2);
- case AArch64::ORRXrs:
- MachineOperand &Imm = MI.getOperand(3);
- // Fourth operand of ORR must be 32 which mean
- // 32bit symmetric constant load.
- // ex) renamable $x8 = ORRXrs $x8, $x8, 32
- if (MatchBaseReg(3) && Imm.isImm() && Imm.getImm() == 32)
- return true;
- }
-
- return false;
-}
-
-MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
- MachineInstr &MI, SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
- int UpperLoadIdx, int Accumulated) {
- MachineBasicBlock::iterator I = MI.getIterator();
- MachineBasicBlock::iterator E = I->getParent()->end();
- MachineBasicBlock::iterator NextI = next_nodbg(I, E);
- MachineBasicBlock *MBB = MI.getParent();
-
- if (!UpperLoadIdx) {
- // ORR ensures that previous instructions load lower 32-bit constants.
- // Remove ORR only.
- (*MIs.begin())->eraseFromParent();
- } else {
- // We need to remove MOV for upper of 32bit because we know these instrs
- // is part of symmetric constant.
- int Index = 0;
- for (auto MI = MIs.begin(); Index < UpperLoadIdx; ++MI, Index++) {
- (*MI)->eraseFromParent();
- }
- }
-
- Register BaseReg = getLdStRegOp(MI).getReg();
- const MachineOperand MO = AArch64InstrInfo::getLdStBaseOp(MI);
- Register DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
- unsigned DstRegState = getRegState(MI.getOperand(0));
- int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
- BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STPWi))
- .addReg(DstRegW, DstRegState)
- .addReg(DstRegW, DstRegState)
- .addReg(MO.getReg(), getRegState(MO))
- .addImm(Offset * 2)
- .setMemRefs(MI.memoperands())
- .setMIFlags(MI.getFlags());
- I->eraseFromParent();
- return NextI;
-}
-
-bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
- MachineBasicBlock::iterator &I, unsigned Limit) {
- MachineInstr &MI = *I;
- if (MI.getOpcode() != AArch64::STRXui)
- return false;
-
- MachineBasicBlock::iterator MBBI = I;
- MachineBasicBlock::iterator B = I->getParent()->begin();
- if (MBBI == B)
- return false;
-
- TypeSize Scale(0U, false), Width(0U, false);
- int64_t MinOffset, MaxOffset;
- if (!AArch64InstrInfo::getMemOpInfo(AArch64::STPWi, Scale, Width, MinOffset,
- MaxOffset))
- return false;
-
- // We replace the STRX instruction, which stores 64 bits, with the STPW
- // instruction, which stores two consecutive 32 bits. Therefore, we compare
- // the offset range with multiplied by two.
- int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
- if (Offset * 2 < MinOffset || Offset * 2 > MaxOffset)
- return false;
-
- Register BaseReg = getLdStRegOp(MI).getReg();
- unsigned Count = 0, UpperLoadIdx = 0;
- uint64_t Accumulated = 0, Mask = 0xFFFFUL;
- bool hasORR = false, Found = false;
- SmallVector<MachineBasicBlock::iterator> MIs;
- ModifiedRegUnits.clear();
- UsedRegUnits.clear();
- do {
- MBBI = prev_nodbg(MBBI, B);
- MachineInstr &MI = *MBBI;
- if (!MI.isTransient())
- ++Count;
- if (!isSymmetricLoadCandidate(MI, BaseReg)) {
- LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
- TRI);
- if (!ModifiedRegUnits.available(BaseReg) ||
- !UsedRegUnits.available(BaseReg))
- return false;
- continue;
- }
-
- unsigned Opc = MI.getOpcode();
- if (Opc == AArch64::ORRXrs) {
- hasORR = true;
- MIs.push_back(MBBI);
- continue;
- }
- unsigned ValueOrder = Opc == AArch64::MOVZXi ? 1 : 2;
- MachineOperand Value = MI.getOperand(ValueOrder);
- MachineOperand Shift = MI.getOperand(ValueOrder + 1);
- if (!Value.isImm() || !Shift.isImm())
- return false;
-
- uint64_t IValue = Value.getImm();
- uint64_t IShift = Shift.getImm();
- uint64_t Adder = IValue << IShift;
- MIs.push_back(MBBI);
- if (Adder >> 32)
- UpperLoadIdx = MIs.size();
-
- Accumulated -= Accumulated & (Mask << IShift);
- Accumulated += Adder;
- if (Accumulated != 0 &&
- (((Accumulated >> 32) == (Accumulated & 0xffffffffULL)) ||
- (hasORR && (Accumulated >> 32 == 0)))) {
- Found = true;
- break;
- }
- } while (MBBI != B && Count < Limit);
-
- if (Found) {
- I = doFoldSymmetryConstantLoad(MI, MIs, UpperLoadIdx, Accumulated);
- return true;
- }
-
- return false;
-}
-
bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
@@ -2910,27 +2753,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
}
- // We have an opportunity to optimize the `STRXui` instruction, which loads
- // the same 32-bit value into a register twice. The `STPXi` instruction allows
- // us to load a 32-bit value only once.
- // Considering :
- // renamable $x8 = MOVZXi 49370, 0
- // renamable $x8 = MOVKXi $x8, 320, 16
- // renamable $x8 = ORRXrs $x8, $x8, 32
- // STRXui killed renamable $x8, killed renamable $x0, 0
- // Transform :
- // $w8 = MOVZWi 49370, 0
- // $w8 = MOVKWi $w8, 320, 16
- // STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
- for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- MBBI != E;) {
- if (isMergeableLdStUpdate(*MBBI) &&
- tryFoldSymmetryConstantLoad(MBBI, UpdateLimit))
- Modified = true;
- else
- ++MBBI;
- }
-
return Modified;
}