diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 39 |
1 files changed, 18 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index ae3f2b87f353..a3cb3b3f47e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic( return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } -bool AMDGPUInstructionSelector::selectPOPSExitingWaveID( - MachineInstr &MI) const { - Register Dst = MI.getOperand(0).getReg(); - const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock *MBB = MI.getParent(); - - // TODO: Select this with a tablegen pattern. This is tricky because the - // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked - // mayLoad/mayStore and tablegen complains about the mismatch. - auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst) - .addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID); - MI.eraseFromParent(); - return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); -} - bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MachineInstr &I) const { Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID(); @@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( return selectSBarrierSignalIsfirst(I, IntrinsicID); case Intrinsic::amdgcn_s_barrier_leave: return selectSBarrierLeave(I); - case Intrinsic::amdgcn_pops_exiting_wave_id: - return selectPOPSExitingWaveID(I); } return selectImpl(I, *CoverageInfo); } @@ -3620,8 +3603,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_ATOMICRMW_UINC_WRAP: case TargetOpcode::G_ATOMICRMW_UDEC_WRAP: case TargetOpcode::G_ATOMICRMW_FADD: - case AMDGPU::G_AMDGPU_ATOMIC_FMIN: - case AMDGPU::G_AMDGPU_ATOMIC_FMAX: + case TargetOpcode::G_ATOMICRMW_FMIN: + case TargetOpcode::G_ATOMICRMW_FMAX: return selectG_LOAD_STORE_ATOMICRMW(I); case TargetOpcode::G_SELECT: return selectG_SELECT(I); @@ -4216,10 +4199,11 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, return false; const GEPInfo &GEPI = AddrInfo[0]; - std::optional<int64_t> EncodedImm = - AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false); + std::optional<int64_t> EncodedImm; if (SOffset && Offset) { + EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false, + /*HasSOffset=*/true); if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm && AddrInfo.size() > 1) { const GEPInfo &GEPI2 = AddrInfo[1]; @@ -4229,6 +4213,17 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, Base = GEPI2.SgprParts[0]; *SOffset = OffsetReg; *Offset = *EncodedImm; + if (*Offset >= 0 || !AMDGPU::hasSMRDSignedImmOffset(STI)) + return true; + + // For unbuffered smem loads, it is illegal for the Immediate Offset + // to be negative if the resulting (Offset + (M0 or SOffset or zero) + // is negative. Handle the case where the Immediate Offset + SOffset + // is negative. + auto SKnown = KB->getKnownBits(*SOffset); + if (*Offset + SKnown.getMinValue().getSExtValue() < 0) + return false; + return true; } } @@ -4236,6 +4231,8 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, return false; } + EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false, + /*HasSOffset=*/false); if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) { Base = GEPI.SgprParts[0]; *Offset = *EncodedImm; |
