summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp152
1 files changed, 152 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d24c7da964ce..75fac09d0b99 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1791,6 +1791,19 @@ bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
return true;
}
}
+
+ // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
+ if (STI.hasSplitBarriers()) {
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
+ .addImm(AMDGPU::Barrier::WORKGROUP);
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT))
+ .addImm(AMDGPU::Barrier::WORKGROUP);
+ MI.eraseFromParent();
+ return true;
+ }
+
return selectImpl(MI, *CoverageInfo);
}
@@ -2137,6 +2150,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
break;
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
return selectDSBvhStackIntrinsic(I);
+ case Intrinsic::amdgcn_s_barrier_init:
+ case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ case Intrinsic::amdgcn_s_get_barrier_state:
+ return selectNamedBarrierInst(I, IntrinsicID);
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst:
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
+ return selectSBarrierSignalIsfirst(I, IntrinsicID);
+ case Intrinsic::amdgcn_s_barrier_leave:
+ return selectSBarrierLeave(I);
}
return selectImpl(I, *CoverageInfo);
}
@@ -5239,6 +5262,135 @@ AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {
}};
}
+bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
+ MachineInstr &I, Intrinsic::ID IntrID) const {
+ MachineBasicBlock *MBB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ Register CCReg = I.getOperand(0).getReg();
+
+ bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
+
+ if (HasM0) {
+ auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(I.getOperand(2).getReg());
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
+ if (!constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI))
+ return false;
+ } else {
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
+ .addImm(I.getOperand(2).getImm());
+ }
+
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);
+
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
+ *MRI);
+}
+
+unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
+ if (HasInlineConst) {
+ switch (IntrID) {
+ default:
+ llvm_unreachable("not a named barrier op");
+ case Intrinsic::amdgcn_s_barrier_init:
+ return AMDGPU::S_BARRIER_INIT_IMM;
+ case Intrinsic::amdgcn_s_barrier_join:
+ return AMDGPU::S_BARRIER_JOIN_IMM;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ return AMDGPU::S_WAKEUP_BARRIER_IMM;
+ case Intrinsic::amdgcn_s_get_barrier_state:
+ return AMDGPU::S_GET_BARRIER_STATE_IMM;
+ };
+ } else {
+ switch (IntrID) {
+ default:
+ llvm_unreachable("not a named barrier op");
+ case Intrinsic::amdgcn_s_barrier_init:
+ return AMDGPU::S_BARRIER_INIT_M0;
+ case Intrinsic::amdgcn_s_barrier_join:
+ return AMDGPU::S_BARRIER_JOIN_M0;
+ case Intrinsic::amdgcn_s_wakeup_barrier:
+ return AMDGPU::S_WAKEUP_BARRIER_M0;
+ case Intrinsic::amdgcn_s_get_barrier_state:
+ return AMDGPU::S_GET_BARRIER_STATE_M0;
+ };
+ }
+}
+
+bool AMDGPUInstructionSelector::selectNamedBarrierInst(
+ MachineInstr &I, Intrinsic::ID IntrID) const {
+ MachineBasicBlock *MBB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
+ ? I.getOperand(2)
+ : I.getOperand(1);
+ std::optional<int64_t> BarValImm =
+ getIConstantVRegSExtVal(BarOp.getReg(), *MRI);
+ Register M0Val;
+ Register TmpReg0;
+
+ // For S_BARRIER_INIT, member count will always be read from M0[16:22]
+ if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
+ Register MemberCount = I.getOperand(2).getReg();
+ TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ // TODO: This should be expanded during legalization so that the the S_LSHL
+ // and S_OR can be constant-folded
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)
+ .addImm(16)
+ .addReg(MemberCount);
+ M0Val = TmpReg0;
+ }
+
+ // If not inlinable, get reference to barrier depending on the instruction
+ if (!BarValImm) {
+ if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
+ // If reference to barrier id is not an inlinable constant then it must be
+ // referenced with M0[4:0]. Perform an OR with the member count to include
+ // it in M0 for S_BARRIER_INIT.
+ Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_OR_B32), TmpReg1)
+ .addReg(BarOp.getReg())
+ .addReg(TmpReg0);
+ M0Val = TmpReg1;
+ } else {
+ M0Val = BarOp.getReg();
+ }
+ }
+
+ // Build copy to M0 if needed. For S_BARRIER_INIT, M0 is always required.
+ if (M0Val) {
+ auto CopyMIB =
+ BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(M0Val);
+ constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI);
+ }
+
+ MachineInstrBuilder MIB;
+ unsigned Opc = getNamedBarrierOp(BarValImm.has_value(), IntrID);
+ MIB = BuildMI(*MBB, &I, DL, TII.get(Opc));
+
+ if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
+ MIB.addDef(I.getOperand(0).getReg());
+
+ if (BarValImm)
+ MIB.addImm(*BarValImm);
+
+ I.eraseFromParent();
+ return true;
+}
+bool AMDGPUInstructionSelector::selectSBarrierLeave(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ Register CCReg = I.getOperand(0).getReg();
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);
+
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
+ *MRI);
+}
+
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {