diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index d24c7da964ce..75fac09d0b99 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1791,6 +1791,19 @@ bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const { return true; } } + + // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait + if (STI.hasSplitBarriers()) { + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM)) + .addImm(AMDGPU::Barrier::WORKGROUP); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT)) + .addImm(AMDGPU::Barrier::WORKGROUP); + MI.eraseFromParent(); + return true; + } + return selectImpl(MI, *CoverageInfo); } @@ -2137,6 +2150,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( break; case Intrinsic::amdgcn_ds_bvh_stack_rtn: return selectDSBvhStackIntrinsic(I); + case Intrinsic::amdgcn_s_barrier_init: + case Intrinsic::amdgcn_s_barrier_join: + case Intrinsic::amdgcn_s_wakeup_barrier: + case Intrinsic::amdgcn_s_get_barrier_state: + return selectNamedBarrierInst(I, IntrinsicID); + case Intrinsic::amdgcn_s_barrier_signal_isfirst: + case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: + return selectSBarrierSignalIsfirst(I, IntrinsicID); + case Intrinsic::amdgcn_s_barrier_leave: + return selectSBarrierLeave(I); } return selectImpl(I, *CoverageInfo); } @@ -5239,6 +5262,135 @@ AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const { }}; } +bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst( + MachineInstr &I, Intrinsic::ID IntrID) const { + MachineBasicBlock *MBB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + Register CCReg = I.getOperand(0).getReg(); + + bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var; + + if (HasM0) { + auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .addReg(I.getOperand(2).getReg()); + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0)); + if (!constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI)) + return false; + } else { + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM)) + .addImm(I.getOperand(2).getImm()); + } + + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC); + + I.eraseFromParent(); + return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass, + *MRI); +} + +unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) { + if (HasInlineConst) { + switch (IntrID) { + default: + llvm_unreachable("not a named barrier op"); + case Intrinsic::amdgcn_s_barrier_init: + return AMDGPU::S_BARRIER_INIT_IMM; + case Intrinsic::amdgcn_s_barrier_join: + return AMDGPU::S_BARRIER_JOIN_IMM; + case Intrinsic::amdgcn_s_wakeup_barrier: + return AMDGPU::S_WAKEUP_BARRIER_IMM; + case Intrinsic::amdgcn_s_get_barrier_state: + return AMDGPU::S_GET_BARRIER_STATE_IMM; + }; + } else { + switch (IntrID) { + default: + llvm_unreachable("not a named barrier op"); + case Intrinsic::amdgcn_s_barrier_init: + return AMDGPU::S_BARRIER_INIT_M0; + case Intrinsic::amdgcn_s_barrier_join: + return AMDGPU::S_BARRIER_JOIN_M0; + case Intrinsic::amdgcn_s_wakeup_barrier: + return AMDGPU::S_WAKEUP_BARRIER_M0; + case Intrinsic::amdgcn_s_get_barrier_state: + return AMDGPU::S_GET_BARRIER_STATE_M0; + }; + } +} + +bool AMDGPUInstructionSelector::selectNamedBarrierInst( + MachineInstr &I, Intrinsic::ID IntrID) const { + MachineBasicBlock *MBB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state + ? I.getOperand(2) + : I.getOperand(1); + std::optional<int64_t> BarValImm = + getIConstantVRegSExtVal(BarOp.getReg(), *MRI); + Register M0Val; + Register TmpReg0; + + // For S_BARRIER_INIT, member count will always be read from M0[16:22] + if (IntrID == Intrinsic::amdgcn_s_barrier_init) { + Register MemberCount = I.getOperand(2).getReg(); + TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + // TODO: This should be expanded during legalization so that the the S_LSHL + // and S_OR can be constant-folded + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0) + .addImm(16) + .addReg(MemberCount); + M0Val = TmpReg0; + } + + // If not inlinable, get reference to barrier depending on the instruction + if (!BarValImm) { + if (IntrID == Intrinsic::amdgcn_s_barrier_init) { + // If reference to barrier id is not an inlinable constant then it must be + // referenced with M0[4:0]. Perform an OR with the member count to include + // it in M0 for S_BARRIER_INIT. + Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_OR_B32), TmpReg1) + .addReg(BarOp.getReg()) + .addReg(TmpReg0); + M0Val = TmpReg1; + } else { + M0Val = BarOp.getReg(); + } + } + + // Build copy to M0 if needed. For S_BARRIER_INIT, M0 is always required. + if (M0Val) { + auto CopyMIB = + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(M0Val); + constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI); + } + + MachineInstrBuilder MIB; + unsigned Opc = getNamedBarrierOp(BarValImm.has_value(), IntrID); + MIB = BuildMI(*MBB, &I, DL, TII.get(Opc)); + + if (IntrID == Intrinsic::amdgcn_s_get_barrier_state) + MIB.addDef(I.getOperand(0).getReg()); + + if (BarValImm) + MIB.addImm(*BarValImm); + + I.eraseFromParent(); + return true; +} +bool AMDGPUInstructionSelector::selectSBarrierLeave(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + Register CCReg = I.getOperand(0).getReg(); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_BARRIER_LEAVE)); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC); + + I.eraseFromParent(); + return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass, + *MRI); +} + void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { |
