summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp86
1 files changed, 65 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 1334029544f9..701084844cd9 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -15,19 +15,13 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/BranchProbability.h"
using namespace llvm;
#define DEBUG_TYPE "si-pre-emit-peephole"
-static unsigned SkipThreshold;
-
-static cl::opt<unsigned, true> SkipThresholdFlag(
- "amdgpu-skip-threshold", cl::Hidden,
- cl::desc(
- "Number of instructions before jumping over divergent control flow"),
- cl::location(SkipThreshold), cl::init(12));
-
namespace {
class SIPreEmitPeephole : public MachineFunctionPass {
@@ -41,7 +35,8 @@ private:
MachineBasicBlock *&TrueMBB,
MachineBasicBlock *&FalseMBB,
SmallVectorImpl<MachineOperand> &Cond);
- bool mustRetainExeczBranch(const MachineBasicBlock &From,
+ bool mustRetainExeczBranch(const MachineInstr &Branch,
+ const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
@@ -304,11 +299,58 @@ bool SIPreEmitPeephole::getBlockDestinations(
return true;
}
+namespace {
+class BranchWeightCostModel {
+ const SIInstrInfo &TII;
+ const TargetSchedModel &SchedModel;
+ BranchProbability BranchProb;
+ static constexpr uint64_t BranchNotTakenCost = 1;
+ uint64_t BranchTakenCost;
+ uint64_t ThenCyclesCost = 0;
+
+public:
+ BranchWeightCostModel(const SIInstrInfo &TII, const MachineInstr &Branch,
+ const MachineBasicBlock &Succ)
+ : TII(TII), SchedModel(TII.getSchedModel()) {
+ const MachineBasicBlock &Head = *Branch.getParent();
+ const auto *FromIt = find(Head.successors(), &Succ);
+ assert(FromIt != Head.succ_end());
+
+ BranchProb = Head.getSuccProbability(FromIt);
+ if (BranchProb.isUnknown())
+ BranchProb = BranchProbability::getZero();
+ BranchTakenCost = SchedModel.computeInstrLatency(&Branch);
+ }
+
+ bool isProfitable(const MachineInstr &MI) {
+ if (TII.isWaitcnt(MI.getOpcode()))
+ return false;
+
+ ThenCyclesCost += SchedModel.computeInstrLatency(&MI);
+
+ // Consider `P = N/D` to be the probability of execz being false (skipping
+ // the then-block) The transformation is profitable if always executing the
+ // 'then' block is cheaper than executing sometimes 'then' and always
+ // executing s_cbranch_execz:
+ // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
+ // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
+ // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
+ // BranchNotTakenCost
+ uint64_t Numerator = BranchProb.getNumerator();
+ uint64_t Denominator = BranchProb.getDenominator();
+ return (Denominator - Numerator) * ThenCyclesCost <=
+ ((Denominator - Numerator) * BranchTakenCost +
+ Numerator * BranchNotTakenCost);
+ }
+};
+
bool SIPreEmitPeephole::mustRetainExeczBranch(
- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
- unsigned NumInstr = 0;
- const MachineFunction *MF = From.getParent();
+ const MachineInstr &Branch, const MachineBasicBlock &From,
+ const MachineBasicBlock &To) const {
+ assert(is_contained(Branch.getParent()->successors(), &From));
+ BranchWeightCostModel CostModel{*TII, Branch, From};
+ const MachineFunction *MF = From.getParent();
for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
MBBI != End && MBBI != ToI; ++MBBI) {
const MachineBasicBlock &MBB = *MBBI;
@@ -326,23 +368,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
return true;
- // These instructions are potentially expensive even if EXEC = 0.
- if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
- TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
- return true;
-
- ++NumInstr;
- if (NumInstr >= SkipThreshold)
+ if (!CostModel.isProfitable(MI))
return true;
}
}
return false;
}
+} // namespace
// Returns true if the skip branch instruction is removed.
bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
MachineBasicBlock &SrcMBB) {
+
+ if (!TII->getSchedModel().hasInstrSchedModel())
+ return false;
+
MachineBasicBlock *TrueMBB = nullptr;
MachineBasicBlock *FalseMBB = nullptr;
SmallVector<MachineOperand, 1> Cond;
@@ -351,8 +392,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return false;
// Consider only the forward branches.
- if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
- mustRetainExeczBranch(*FalseMBB, *TrueMBB))
+ if (SrcMBB.getNumber() >= TrueMBB->getNumber())
+ return false;
+
+ // Consider only when it is legal and profitable
+ if (mustRetainExeczBranch(MI, *FalseMBB, *TrueMBB))
return false;
LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);