summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2025-09-10 15:25:31 -0700
committerGitHub <noreply@github.com>2025-09-10 15:25:31 -0700
commit1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch)
tree57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
parent898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff)
parentb8cefcb601ddaa18482555c4ff363c01a270c2fe (diff)
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp11
1 files changed, 11 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index dce4e6f99300..6533d4c8eca3 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -627,6 +627,9 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
+ // Instructions to re-legalize after changing register classes
+ SmallVector<MachineInstr *, 8> Relegalize;
+
for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
++I) {
@@ -634,6 +637,11 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
switch (MI.getOpcode()) {
default:
+ // scale_src has a register class restricted to low 256 VGPRs, changing
+ // registers to VGPR may not take it into acount.
+ if (TII->isWMMA(MI) &&
+ AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::scale_src0))
+ Relegalize.push_back(&MI);
continue;
case AMDGPU::COPY: {
const TargetRegisterClass *SrcRC, *DstRC;
@@ -791,6 +799,9 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
for (auto *MI : PHINodes) {
processPHINode(*MI);
}
+ while (!Relegalize.empty())
+ TII->legalizeOperands(*Relegalize.pop_back_val(), MDT);
+
if (MF.getTarget().getOptLevel() > CodeGenOptLevel::None && EnableM0Merge)
hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII);