diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 86 |
1 files changed, 65 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 1334029544f9..701084844cd9 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -15,19 +15,13 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/BranchProbability.h" using namespace llvm; #define DEBUG_TYPE "si-pre-emit-peephole" -static unsigned SkipThreshold; - -static cl::opt<unsigned, true> SkipThresholdFlag( - "amdgpu-skip-threshold", cl::Hidden, - cl::desc( - "Number of instructions before jumping over divergent control flow"), - cl::location(SkipThreshold), cl::init(12)); - namespace { class SIPreEmitPeephole : public MachineFunctionPass { @@ -41,7 +35,8 @@ private: MachineBasicBlock *&TrueMBB, MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond); - bool mustRetainExeczBranch(const MachineBasicBlock &From, + bool mustRetainExeczBranch(const MachineInstr &Branch, + const MachineBasicBlock &From, const MachineBasicBlock &To) const; bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB); @@ -304,11 +299,58 @@ bool SIPreEmitPeephole::getBlockDestinations( return true; } +namespace { +class BranchWeightCostModel { + const SIInstrInfo &TII; + const TargetSchedModel &SchedModel; + BranchProbability BranchProb; + static constexpr uint64_t BranchNotTakenCost = 1; + uint64_t BranchTakenCost; + uint64_t ThenCyclesCost = 0; + +public: + BranchWeightCostModel(const SIInstrInfo &TII, const MachineInstr &Branch, + const MachineBasicBlock &Succ) + : TII(TII), SchedModel(TII.getSchedModel()) { + const MachineBasicBlock &Head = *Branch.getParent(); + const auto *FromIt = find(Head.successors(), &Succ); + assert(FromIt != Head.succ_end()); + + BranchProb = Head.getSuccProbability(FromIt); + if (BranchProb.isUnknown()) + BranchProb = BranchProbability::getZero(); + BranchTakenCost = SchedModel.computeInstrLatency(&Branch); + } + + bool isProfitable(const MachineInstr &MI) { + if (TII.isWaitcnt(MI.getOpcode())) + return false; + + ThenCyclesCost += SchedModel.computeInstrLatency(&MI); + + // Consider `P = N/D` to be the probability of execz being false (skipping + // the then-block) The transformation is profitable if always executing the + // 'then' block is cheaper than executing sometimes 'then' and always + // executing s_cbranch_execz: + // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost + // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost + // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D * + // BranchNotTakenCost + uint64_t Numerator = BranchProb.getNumerator(); + uint64_t Denominator = BranchProb.getDenominator(); + return (Denominator - Numerator) * ThenCyclesCost <= + ((Denominator - Numerator) * BranchTakenCost + + Numerator * BranchNotTakenCost); + } +}; + bool SIPreEmitPeephole::mustRetainExeczBranch( - const MachineBasicBlock &From, const MachineBasicBlock &To) const { - unsigned NumInstr = 0; - const MachineFunction *MF = From.getParent(); + const MachineInstr &Branch, const MachineBasicBlock &From, + const MachineBasicBlock &To) const { + assert(is_contained(Branch.getParent()->successors(), &From)); + BranchWeightCostModel CostModel{*TII, Branch, From}; + const MachineFunction *MF = From.getParent(); for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end(); MBBI != End && MBBI != ToI; ++MBBI) { const MachineBasicBlock &MBB = *MBBI; @@ -326,23 +368,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch( if (TII->hasUnwantedEffectsWhenEXECEmpty(MI)) return true; - // These instructions are potentially expensive even if EXEC = 0. - if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) || - TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode())) - return true; - - ++NumInstr; - if (NumInstr >= SkipThreshold) + if (!CostModel.isProfitable(MI)) return true; } } return false; } +} // namespace // Returns true if the skip branch instruction is removed. bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB) { + + if (!TII->getSchedModel().hasInstrSchedModel()) + return false; + MachineBasicBlock *TrueMBB = nullptr; MachineBasicBlock *FalseMBB = nullptr; SmallVector<MachineOperand, 1> Cond; @@ -351,8 +392,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI, return false; // Consider only the forward branches. - if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) || - mustRetainExeczBranch(*FalseMBB, *TrueMBB)) + if (SrcMBB.getNumber() >= TrueMBB->getNumber()) + return false; + + // Consider only when it is legal and profitable + if (mustRetainExeczBranch(MI, *FalseMBB, *TrueMBB)) return false; LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI); |
