diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 58 |
1 files changed, 21 insertions, 37 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 5d31eed8fe7d..12915c734442 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1989,39 +1989,6 @@ bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const { return selectImpl(MI, *CoverageInfo); } -bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const { - Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID(); - if (TM.getOptLevel() > CodeGenOptLevel::None) { - unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second; - if (WGSize <= STI.getWavefrontSize()) { - // If the workgroup fits in a wave, remove s_barrier_signal and lower - // s_barrier/s_barrier_wait to wave_barrier. - if (IntrinsicID == Intrinsic::amdgcn_s_barrier || - IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) { - MachineBasicBlock *MBB = MI.getParent(); - const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER)); - } - MI.eraseFromParent(); - return true; - } - } - - if (STI.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) { - // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait - MachineBasicBlock *MBB = MI.getParent(); - const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM)) - .addImm(AMDGPU::Barrier::WORKGROUP); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT)) - .addImm(AMDGPU::Barrier::WORKGROUP); - MI.eraseFromParent(); - return true; - } - - return selectImpl(MI, *CoverageInfo); -} - static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail) { if (TexFailCtrl) @@ -2338,10 +2305,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( return selectDSAppendConsume(I, false); case Intrinsic::amdgcn_init_whole_wave: return selectInitWholeWave(I); - case Intrinsic::amdgcn_s_barrier: - case Intrinsic::amdgcn_s_barrier_signal: - case Intrinsic::amdgcn_s_barrier_wait: - return selectSBarrier(I); case Intrinsic::amdgcn_raw_buffer_load_lds: case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: case Intrinsic::amdgcn_struct_buffer_load_lds: @@ -5746,6 +5709,16 @@ AMDGPUInstructionSelector::selectGlobalSAddrCPol(MachineOperand &Root) const { } InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(MachineOperand &Root) const { + const MachineInstr &I = *Root.getParent(); + + // We are assuming CPol is second from last operand of the intrinsic. + auto PassedCPol = + I.getOperand(I.getNumOperands() - 2).getImm() & ~AMDGPU::CPol::SCAL; + return selectGlobalSAddr(Root, PassedCPol); +} + +InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectGlobalSAddrGLC(MachineOperand &Root) const { return selectGlobalSAddr(Root, AMDGPU::CPol::GLC); } @@ -5762,6 +5735,17 @@ AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset( } InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0( + MachineOperand &Root) const { + const MachineInstr &I = *Root.getParent(); + + // We are assuming CPol is second from last operand of the intrinsic. + auto PassedCPol = + I.getOperand(I.getNumOperands() - 2).getImm() & ~AMDGPU::CPol::SCAL; + return selectGlobalSAddr(Root, PassedCPol, false); +} + +InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const { Register Addr = Root.getReg(); Register PtrBase; |
