diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 75 |
1 files changed, 63 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index ae537b194f50..7553c370f694 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -352,6 +352,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { return 1; case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec: case AMDGPU::S_LOAD_DWORDX2_IMM: case AMDGPU::S_LOAD_DWORDX2_IMM_ec: case AMDGPU::GLOBAL_LOAD_DWORDX2: @@ -363,6 +365,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { return 2; case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec: case AMDGPU::S_LOAD_DWORDX3_IMM: case AMDGPU::S_LOAD_DWORDX3_IMM_ec: case AMDGPU::GLOBAL_LOAD_DWORDX3: @@ -374,6 +378,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { return 3; case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec: case AMDGPU::S_LOAD_DWORDX4_IMM: case AMDGPU::S_LOAD_DWORDX4_IMM_ec: case AMDGPU::GLOBAL_LOAD_DWORDX4: @@ -385,6 +391,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { return 4; case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec: case AMDGPU::S_LOAD_DWORDX8_IMM: case AMDGPU::S_LOAD_DWORDX8_IMM_ec: return 8; @@ -499,12 +507,20 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec: return S_BUFFER_LOAD_IMM; case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec: return S_BUFFER_LOAD_SGPR_IMM; case AMDGPU::S_LOAD_DWORD_IMM: case AMDGPU::S_LOAD_DWORDX2_IMM: @@ -587,12 +603,20 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec: return AMDGPU::S_BUFFER_LOAD_DWORD_IMM; case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec: return AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM; case AMDGPU::S_LOAD_DWORD_IMM: case AMDGPU::S_LOAD_DWORDX2_IMM: @@ -703,6 +727,10 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec: Result.SOffset = true; [[fallthrough]]; case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: @@ -710,6 +738,10 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec: + case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec: case AMDGPU::S_LOAD_DWORD_IMM: case AMDGPU::S_LOAD_DWORDX2_IMM: case AMDGPU::S_LOAD_DWORDX3_IMM: @@ -1679,6 +1711,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair( return New; } +static bool needsConstraintedOpcode(const GCNSubtarget &STM, + const MachineMemOperand *MMO, + unsigned Width) { + return STM.isXNACKEnabled() && MMO->getAlign().value() < Width * 4; +} + unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired) { const unsigned Width = CI.Width + Paired.Width; @@ -1696,38 +1734,51 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, case UNKNOWN: llvm_unreachable("Unknown instruction class"); - case S_BUFFER_LOAD_IMM: + case S_BUFFER_LOAD_IMM: { + const MachineMemOperand *MMO = *CI.I->memoperands_begin(); + bool NeedsConstrainedOpc = needsConstraintedOpcode(*STM, MMO, Width); switch (Width) { default: return 0; case 2: - return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM; + return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec + : AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM; case 3: - return AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM; + return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec + : AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM; case 4: - return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM; + return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec + : AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM; case 8: - return AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM; + return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec + : AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM; } - case S_BUFFER_LOAD_SGPR_IMM: + } + case S_BUFFER_LOAD_SGPR_IMM: { + const MachineMemOperand *MMO = *CI.I->memoperands_begin(); + bool NeedsConstrainedOpc = needsConstraintedOpcode(*STM, MMO, Width); switch (Width) { default: return 0; case 2: - return AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM; + return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec + : AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM; case 3: - return AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM; + return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec + : AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM; case 4: - return AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM; + return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec + : AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM; case 8: - return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM; + return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec + : AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM; } + } case S_LOAD_IMM: { // If XNACK is enabled, use the constrained opcodes when the first load is // under-aligned. const MachineMemOperand *MMO = *CI.I->memoperands_begin(); - bool NeedsConstrainedOpc = - STM->isXNACKEnabled() && MMO->getAlign().value() < Width * 4; + bool NeedsConstrainedOpc = needsConstraintedOpcode(*STM, MMO, Width); switch (Width) { default: return 0; |
