diff options
| author | Vitaly Buka <vitalybuka@google.com> | 2024-09-10 09:51:43 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-10 09:51:43 -0700 |
| commit | c7a7767fca736d0447832ea4d4587fb3b9e797c2 (patch) | |
| tree | 4c8e6b67195311e57694ab8297357cd3c312f623 /llvm/lib/Target/AMDGPU | |
| parent | 14b43563d1618c71385f3ac15a475adf9d0ef5f9 (diff) | |
Revert "[amdgpu] Add llvm.amdgcn.init.whole.wave intrinsic" (#108054)
Breaks bots, see #105822.
Reverts llvm/llvm-project#105822
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 26 |
11 files changed, 5 insertions, 72 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 380dc7d3312f..0daaf6b65760 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2738,11 +2738,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { case Intrinsic::amdgcn_ds_bvh_stack_rtn: SelectDSBvhStackIntrinsic(N); return; - case Intrinsic::amdgcn_init_whole_wave: - CurDAG->getMachineFunction() - .getInfo<SIMachineFunctionInfo>() - ->setInitWholeWave(); - break; } SelectCode(N); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 53085d423cef..4dfd3f087c1a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1772,14 +1772,6 @@ bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI, return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } -bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const { - MachineFunction *MF = MI.getParent()->getParent(); - SIMachineFunctionInfo *MFInfo = MF->getInfo<SIMachineFunctionInfo>(); - - MFInfo->setInitWholeWave(); - return selectImpl(MI, *CoverageInfo); -} - bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const { if (TM.getOptLevel() > CodeGenOptLevel::None) { unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second; @@ -2107,8 +2099,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( return selectDSAppendConsume(I, true); case Intrinsic::amdgcn_ds_consume: return selectDSAppendConsume(I, false); - case Intrinsic::amdgcn_init_whole_wave: - return selectInitWholeWave(I); case Intrinsic::amdgcn_s_barrier: return selectSBarrier(I); case Intrinsic::amdgcn_raw_buffer_load_lds: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index df39ecbd61bc..068db5c1c144 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -120,7 +120,6 @@ private: bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const; - bool selectInitWholeWave(MachineInstr &MI) const; bool selectSBarrier(MachineInstr &MI) const; bool selectDSBvhStackIntrinsic(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index b1022e48b8d3..7efb7f825348 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -67,8 +67,6 @@ protected: // Kernel may need limited waves per EU for better performance. bool WaveLimiter = false; - bool HasInitWholeWave = false; - public: AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); @@ -111,9 +109,6 @@ public: return WaveLimiter; } - bool hasInitWholeWave() const { return HasInitWholeWave; } - void setInitWholeWave() { HasInitWholeWave = true; } - unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) { return allocateLDSGlobal(DL, GV, DynLDSAlign); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index f2c9619cb827..46d98cad963b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4997,7 +4997,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize); break; } - case Intrinsic::amdgcn_init_whole_wave: case Intrinsic::amdgcn_live_mask: { OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 2cd5fb2b9428..95c4859674ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -329,7 +329,6 @@ def : SourceOfDivergence<int_amdgcn_mov_dpp>; def : SourceOfDivergence<int_amdgcn_mov_dpp8>; def : SourceOfDivergence<int_amdgcn_update_dpp>; def : SourceOfDivergence<int_amdgcn_writelane>; -def : SourceOfDivergence<int_amdgcn_init_whole_wave>; foreach intr = AMDGPUMFMAIntrinsics908 in def : SourceOfDivergence<intr>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7f659578a6d2..9c9c50513937 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1739,9 +1739,6 @@ bool GCNTargetMachine::parseMachineFunctionInfo( ? DenormalMode::IEEE : DenormalMode::PreserveSign; - if (YamlMFI.HasInitWholeWave) - MFI->setInitWholeWave(); - return false; } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index dfdc7ad32b00..8c951105101d 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1343,14 +1343,10 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // Allocate spill slots for WWM reserved VGPRs. // For chain functions, we only need to do this if we have calls to - // llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since - // chain functions do not return) and the function did not contain a call to - // llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes - // when entering the function). - bool IsChainWithoutRestores = - FuncInfo->isChainFunction() && - (!MF.getFrameInfo().hasTailCall() || FuncInfo->hasInitWholeWave()); - if (!FuncInfo->isEntryFunction() && !IsChainWithoutRestores) { + // llvm.amdgcn.cs.chain. + bool IsChainWithoutCalls = + FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall(); + if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) { for (Register Reg : FuncInfo->getWWMReservedRegs()) { const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f3eee9c807c1..b7543238c130 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -583,16 +583,6 @@ def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI < let Defs = [EXEC]; } -// Sets EXEC to all lanes and returns the previous EXEC. -def SI_INIT_WHOLE_WAVE : SPseudoInstSI < - (outs SReg_1:$dst), (ins), - [(set i1:$dst, (int_amdgcn_init_whole_wave))]> { - let Defs = [EXEC]; - let Uses = [EXEC]; - - let isConvergent = 1; -} - // Return for returning shaders to a shader variant epilog. def SI_RETURN_TO_EPILOG : SPseudoInstSI < (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 7cebfa29fe7b..7af5e7388f84 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -289,8 +289,6 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { StringValue SGPRForEXECCopy; StringValue LongBranchReservedReg; - bool HasInitWholeWave = false; - SIMachineFunctionInfo() = default; SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, const TargetRegisterInfo &TRI, @@ -338,7 +336,6 @@ template <> struct MappingTraits<SIMachineFunctionInfo> { StringValue()); // Don't print out when it's empty. YamlIO.mapOptional("longBranchReservedReg", MFI.LongBranchReservedReg, StringValue()); - YamlIO.mapOptional("hasInitWholeWave", MFI.HasInitWholeWave, false); } }; diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 7a493d8d1d1d..f9d7ead4ff3e 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -594,8 +594,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, KillInstrs.push_back(&MI); BBI.NeedsLowering = true; } else if (Opcode == AMDGPU::SI_INIT_EXEC || - Opcode == AMDGPU::SI_INIT_EXEC_FROM_INPUT || - Opcode == AMDGPU::SI_INIT_WHOLE_WAVE) { + Opcode == AMDGPU::SI_INIT_EXEC_FROM_INPUT) { InitExecInstrs.push_back(&MI); } else if (WQMOutputs) { // The function is in machine SSA form, which means that physical @@ -1583,29 +1582,6 @@ void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) { MachineBasicBlock *MBB = MI.getParent(); bool IsWave32 = ST->isWave32(); - if (MI.getOpcode() == AMDGPU::SI_INIT_WHOLE_WAVE) { - assert(MBB == &MBB->getParent()->front() && - "init whole wave not in entry block"); - Register EntryExec = MRI->createVirtualRegister(TRI->getBoolRC()); - MachineInstr *SaveExec = - BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(), - TII->get(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 - : AMDGPU::S_OR_SAVEEXEC_B64), - EntryExec) - .addImm(-1); - - // Replace all uses of MI's destination reg with EntryExec. - MRI->replaceRegWith(MI.getOperand(0).getReg(), EntryExec); - MI.eraseFromParent(); - - if (LIS) { - LIS->RemoveMachineInstrFromMaps(MI); - LIS->InsertMachineInstrInMaps(*SaveExec); - LIS->createAndComputeVirtRegInterval(EntryExec); - } - return; - } - if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) { // This should be before all vector instructions. MachineInstr *InitMI = |
