summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2025-11-14 09:18:43 -0800
committerGitHub <noreply@github.com>2025-11-14 17:18:43 +0000
commitcfc74dddeffd3e53c7fdb90593db01a01cffda8f (patch)
treefb87c5c1fb7031df500f76537bf5a57342a0d0a9 /llvm/lib/Target/AMDGPU
parent112258138d586680fdd69650f383536c53861cc9 (diff)
AMDGPU: Constrain readfirstlane operand when writing to m0 (#168004)
Fixes another verifier error after introducing AV registers. Also fixes not clearing the subregister index if there was one.
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp22
1 files changed, 18 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 1e3562b37d87..e1647b76702c 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -902,14 +902,28 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
// really much we can do to fix this.
// Some special instructions use M0 as an input. Some even only use
// the first lane. Insert a readfirstlane and hope for the best.
- if (DstReg == AMDGPU::M0 &&
- TRI->hasVectorRegisters(MRI->getRegClass(SrcReg))) {
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
+ if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
- TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
+
+ const MCInstrDesc &ReadFirstLaneDesc =
+ TII->get(AMDGPU::V_READFIRSTLANE_B32);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), ReadFirstLaneDesc, TmpReg)
.add(MI.getOperand(1));
+
+ unsigned SubReg = MI.getOperand(1).getSubReg();
MI.getOperand(1).setReg(TmpReg);
+ MI.getOperand(1).setSubReg(AMDGPU::NoSubRegister);
+
+ const TargetRegisterClass *OpRC = TII->getRegClass(ReadFirstLaneDesc, 1);
+ const TargetRegisterClass *ConstrainRC =
+ SubReg == AMDGPU::NoSubRegister
+ ? OpRC
+ : TRI->getMatchingSuperRegClass(SrcRC, OpRC, SubReg);
+
+ if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
+ llvm_unreachable("failed to constrain register");
} else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
MI, MI.getDebugLoc())) {
I = std::next(I);