diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index dce4e6f99300..6533d4c8eca3 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -627,6 +627,9 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) { TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); + // Instructions to re-legalize after changing register classes + SmallVector<MachineInstr *, 8> Relegalize; + for (MachineBasicBlock &MBB : MF) { for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { @@ -634,6 +637,11 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) { switch (MI.getOpcode()) { default: + // scale_src has a register class restricted to low 256 VGPRs, changing + // registers to VGPR may not take it into acount. + if (TII->isWMMA(MI) && + AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::scale_src0)) + Relegalize.push_back(&MI); continue; case AMDGPU::COPY: { const TargetRegisterClass *SrcRC, *DstRC; @@ -791,6 +799,9 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) { for (auto *MI : PHINodes) { processPHINode(*MI); } + while (!Relegalize.empty()) + TII->legalizeOperands(*Relegalize.pop_back_val(), MDT); + if (MF.getTarget().getOptLevel() > CodeGenOptLevel::None && EnableM0Merge) hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII); |
