diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFrameLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 9b348d46fec4..ce25bf499c41 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1132,9 +1132,18 @@ void SIFrameLowering::emitCSRSpillRestores( RestoreWWMRegisters(WWMCalleeSavedRegs); // The original EXEC is the first operand of the return instruction. - const MachineInstr &Return = MBB.instr_back(); - assert(Return.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN && - "Unexpected return inst"); + MachineInstr &Return = MBB.instr_back(); + unsigned Opcode = Return.getOpcode(); + switch (Opcode) { + case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN: + Opcode = AMDGPU::SI_RETURN; + break; + case AMDGPU::SI_TCRETURN_GFX_WholeWave: + Opcode = AMDGPU::SI_TCRETURN_GFX; + break; + default: + llvm_unreachable("Unexpected return inst"); + } Register OrigExec = Return.getOperand(0).getReg(); if (!WWMScratchRegs.empty()) { @@ -1148,6 +1157,11 @@ void SIFrameLowering::emitCSRSpillRestores( // Restore original EXEC. unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec); + + // Drop the first operand and update the opcode. + Return.removeOperand(0); + Return.setDesc(TII->get(Opcode)); + return; } @@ -1728,7 +1742,9 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, "Whole wave functions can use the reg mapped for their i1 argument"); // FIXME: Be more efficient! - for (MCRegister Reg : AMDGPU::VGPR_32RegClass) + unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256; + for (MCRegister Reg : + AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs)) if (MF.getRegInfo().isPhysRegModified(Reg)) { MFI->reserveWWMRegister(Reg); MF.begin()->addLiveIn(Reg); |
