summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp39
1 files changed, 18 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index ae3f2b87f353..a3cb3b3f47e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
-bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
- MachineInstr &MI) const {
- Register Dst = MI.getOperand(0).getReg();
- const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock *MBB = MI.getParent();
-
- // TODO: Select this with a tablegen pattern. This is tricky because the
- // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
- // mayLoad/mayStore and tablegen complains about the mismatch.
- auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
- .addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
- MI.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
-}
-
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
- case Intrinsic::amdgcn_pops_exiting_wave_id:
- return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}
@@ -3620,8 +3603,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
case TargetOpcode::G_ATOMICRMW_FADD:
- case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
- case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
+ case TargetOpcode::G_ATOMICRMW_FMIN:
+ case TargetOpcode::G_ATOMICRMW_FMAX:
return selectG_LOAD_STORE_ATOMICRMW(I);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
@@ -4216,10 +4199,11 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
return false;
const GEPInfo &GEPI = AddrInfo[0];
- std::optional<int64_t> EncodedImm =
- AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false);
+ std::optional<int64_t> EncodedImm;
if (SOffset && Offset) {
+ EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false,
+ /*HasSOffset=*/true);
if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
AddrInfo.size() > 1) {
const GEPInfo &GEPI2 = AddrInfo[1];
@@ -4229,6 +4213,17 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
Base = GEPI2.SgprParts[0];
*SOffset = OffsetReg;
*Offset = *EncodedImm;
+ if (*Offset >= 0 || !AMDGPU::hasSMRDSignedImmOffset(STI))
+ return true;
+
+ // For unbuffered smem loads, it is illegal for the Immediate Offset
+ // to be negative if the resulting (Offset + (M0 or SOffset or zero)
+ // is negative. Handle the case where the Immediate Offset + SOffset
+ // is negative.
+ auto SKnown = KB->getKnownBits(*SOffset);
+ if (*Offset + SKnown.getMinValue().getSExtValue() < 0)
+ return false;
+
return true;
}
}
@@ -4236,6 +4231,8 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
return false;
}
+ EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false,
+ /*HasSOffset=*/false);
if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
Base = GEPI.SgprParts[0];
*Offset = *EncodedImm;