summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp58
1 files changed, 21 insertions, 37 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5d31eed8fe7d..12915c734442 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1989,39 +1989,6 @@ bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const {
return selectImpl(MI, *CoverageInfo);
}
-bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
- Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
- if (TM.getOptLevel() > CodeGenOptLevel::None) {
- unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second;
- if (WGSize <= STI.getWavefrontSize()) {
- // If the workgroup fits in a wave, remove s_barrier_signal and lower
- // s_barrier/s_barrier_wait to wave_barrier.
- if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||
- IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {
- MachineBasicBlock *MBB = MI.getParent();
- const DebugLoc &DL = MI.getDebugLoc();
- BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));
- }
- MI.eraseFromParent();
- return true;
- }
- }
-
- if (STI.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
- // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
- MachineBasicBlock *MBB = MI.getParent();
- const DebugLoc &DL = MI.getDebugLoc();
- BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
- .addImm(AMDGPU::Barrier::WORKGROUP);
- BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT))
- .addImm(AMDGPU::Barrier::WORKGROUP);
- MI.eraseFromParent();
- return true;
- }
-
- return selectImpl(MI, *CoverageInfo);
-}
-
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
bool &IsTexFail) {
if (TexFailCtrl)
@@ -2338,10 +2305,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectDSAppendConsume(I, false);
case Intrinsic::amdgcn_init_whole_wave:
return selectInitWholeWave(I);
- case Intrinsic::amdgcn_s_barrier:
- case Intrinsic::amdgcn_s_barrier_signal:
- case Intrinsic::amdgcn_s_barrier_wait:
- return selectSBarrier(I);
case Intrinsic::amdgcn_raw_buffer_load_lds:
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
case Intrinsic::amdgcn_struct_buffer_load_lds:
@@ -5746,6 +5709,16 @@ AMDGPUInstructionSelector::selectGlobalSAddrCPol(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(MachineOperand &Root) const {
+ const MachineInstr &I = *Root.getParent();
+
+ // We are assuming CPol is second from last operand of the intrinsic.
+ auto PassedCPol =
+ I.getOperand(I.getNumOperands() - 2).getImm() & ~AMDGPU::CPol::SCAL;
+ return selectGlobalSAddr(Root, PassedCPol);
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectGlobalSAddrGLC(MachineOperand &Root) const {
return selectGlobalSAddr(Root, AMDGPU::CPol::GLC);
}
@@ -5762,6 +5735,17 @@ AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
+ MachineOperand &Root) const {
+ const MachineInstr &I = *Root.getParent();
+
+ // We are assuming CPol is second from last operand of the intrinsic.
+ auto PassedCPol =
+ I.getOperand(I.getNumOperands() - 2).getImm() & ~AMDGPU::CPol::SCAL;
+ return selectGlobalSAddr(Root, PassedCPol, false);
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
Register Addr = Root.getReg();
Register PtrBase;