summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2025-09-10 15:25:31 -0700
committerGitHub <noreply@github.com>2025-09-10 15:25:31 -0700
commit1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch)
tree57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
parent898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff)
parentb8cefcb601ddaa18482555c4ff363c01a270c2fe (diff)
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFrameLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp24
1 files changed, 20 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 9b348d46fec4..ce25bf499c41 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1132,9 +1132,18 @@ void SIFrameLowering::emitCSRSpillRestores(
RestoreWWMRegisters(WWMCalleeSavedRegs);
// The original EXEC is the first operand of the return instruction.
- const MachineInstr &Return = MBB.instr_back();
- assert(Return.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN &&
- "Unexpected return inst");
+ MachineInstr &Return = MBB.instr_back();
+ unsigned Opcode = Return.getOpcode();
+ switch (Opcode) {
+ case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
+ Opcode = AMDGPU::SI_RETURN;
+ break;
+ case AMDGPU::SI_TCRETURN_GFX_WholeWave:
+ Opcode = AMDGPU::SI_TCRETURN_GFX;
+ break;
+ default:
+ llvm_unreachable("Unexpected return inst");
+ }
Register OrigExec = Return.getOperand(0).getReg();
if (!WWMScratchRegs.empty()) {
@@ -1148,6 +1157,11 @@ void SIFrameLowering::emitCSRSpillRestores(
// Restore original EXEC.
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec);
+
+ // Drop the first operand and update the opcode.
+ Return.removeOperand(0);
+ Return.setDesc(TII->get(Opcode));
+
return;
}
@@ -1728,7 +1742,9 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
"Whole wave functions can use the reg mapped for their i1 argument");
// FIXME: Be more efficient!
- for (MCRegister Reg : AMDGPU::VGPR_32RegClass)
+ unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
+ for (MCRegister Reg :
+ AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
if (MF.getRegInfo().isPhysRegModified(Reg)) {
MFI->reserveWWMRegister(Reg);
MF.begin()->addLiveIn(Reg);