summaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h7
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp59
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h39
4 files changed, 138 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 254b75b784e7..ae553da22e50 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1150,6 +1150,40 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
GCNSchedStage::finalizeGCNSchedStage();
}
+bool ILPInitialScheduleStage::initGCNSchedStage() {
+ if (!GCNSchedStage::initGCNSchedStage())
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ OriginalLoadLatencyScaleFactor = TII->getLoadLatencyScaleFactor();
+ OriginalDSReadLatencyScaleFactor = TII->getDSReadLatencyScaleFactor();
+ OriginalVMEMLoadLatencyScaleFactor = TII->getVMEMLoadLatencyScaleFactor();
+ const unsigned ILPLoadLatencyScaleFactorDefault = 300;
+ if (ILPLoadLatencyScaleFactorDefault > TII->getLoadLatencyScaleFactor())
+ TII->setLoadLatencyScaleFactor(ILPLoadLatencyScaleFactorDefault);
+ if (ILPLoadLatencyScaleFactorDefault > TII->getDSReadLatencyScaleFactor())
+ TII->setDSReadLatencyScaleFactor(ILPLoadLatencyScaleFactorDefault);
+ if (ILPLoadLatencyScaleFactorDefault > TII->getVMEMLoadLatencyScaleFactor())
+ TII->setVMEMLoadLatencyScaleFactor(ILPLoadLatencyScaleFactorDefault);
+
+ LLVM_DEBUG(dbgs() << "ILP Initial Schedule: Set load latency scale factor to "
+ << TII->getLoadLatencyScaleFactor() << '\n');
+ return true;
+}
+
+void ILPInitialScheduleStage::finalizeGCNSchedStage() {
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ TII->setLoadLatencyScaleFactor(OriginalLoadLatencyScaleFactor);
+ TII->setDSReadLatencyScaleFactor(OriginalDSReadLatencyScaleFactor);
+ TII->setVMEMLoadLatencyScaleFactor(OriginalVMEMLoadLatencyScaleFactor);
+
+ LLVM_DEBUG(
+ dbgs() << "ILP Initial Schedule: Restored load latency scale factor to "
+ << OriginalLoadLatencyScaleFactor << "\n");
+
+ GCNSchedStage::finalizeGCNSchedStage();
+}
+
bool GCNSchedStage::initGCNRegion() {
// Check whether this new region is also a new block.
if (DAG.RegionBegin->getParent() != CurrentMBB)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 790370ff8ab4..5be6d4bde6cd 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -515,8 +515,15 @@ public:
};
class ILPInitialScheduleStage : public GCNSchedStage {
+private:
+ unsigned OriginalLoadLatencyScaleFactor = 0;
+ unsigned OriginalDSReadLatencyScaleFactor = 0;
+ unsigned OriginalVMEMLoadLatencyScaleFactor = 0;
+
public:
bool shouldRevertScheduling(unsigned WavesAfter) override;
+ bool initGCNSchedStage() override;
+ void finalizeGCNSchedStage() override;
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
: GCNSchedStage(StageID, DAG) {}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5106478a95b4..a35aabd405a1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -62,9 +62,29 @@ static cl::opt<bool> Fix16BitCopies(
cl::init(true),
cl::ReallyHidden);
+static cl::opt<unsigned> AMDGPULoadLatencyScaleFactor(
+ "amdgpu-load-latency-scale-factor",
+ cl::desc("Scale factor for load instruction latency. Final latency is "
+ "scalled by `Factor / 100 * Latency`."),
+ cl::init(100), cl::ReallyHidden);
+
+static cl::opt<unsigned> AMDGPUDSReadLatencyScaleFactor(
+ "amdgpu-ds-read-latency-scale-factor",
+ cl::desc("Scale factor for LDS (DS) read instruction latency. Final "
+ "latency is scaled by `Factor / 100 * Latency`."),
+ cl::init(100), cl::ReallyHidden);
+
+static cl::opt<unsigned> AMDGPUVMEMLoadLatencyScaleFactor(
+ "amdgpu-vmem-load-latency-scale-factor",
+ cl::desc("Scale factor for VMEM/BUFFER/FLAT load instruction latency. "
+ "Final latency is scaled by `Factor / 100 * Latency`."),
+ cl::init(100), cl::ReallyHidden);
+
SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
: AMDGPUGenInstrInfo(ST, AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
- RI(ST), ST(ST) {
+ RI(ST), ST(ST), LoadLatencyScaleFactor(AMDGPULoadLatencyScaleFactor),
+ DSReadLatencyScaleFactor(AMDGPUDSReadLatencyScaleFactor),
+ VMEMLoadLatencyScaleFactor(AMDGPUVMEMLoadLatencyScaleFactor) {
SchedModel.init(&ST);
}
@@ -10240,6 +10260,43 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return SchedModel.computeInstrLatency(&MI);
}
+std::optional<unsigned>
+SIInstrInfo::getInstrLatency(const TargetSchedModel &TargetSchedModel,
+ const MachineInstr &MI) const {
+ auto LatencyOpt = TargetInstrInfo::getInstrLatency(TargetSchedModel, MI);
+ if (!LatencyOpt)
+ return std::nullopt;
+ unsigned Latency = *LatencyOpt;
+ if (MI.mayLoad()) {
+ unsigned Scale = LoadLatencyScaleFactor;
+ if (isDS(MI))
+ Scale = DSReadLatencyScaleFactor;
+ else if (isVMEM(MI) || isFLAT(MI))
+ Scale = VMEMLoadLatencyScaleFactor;
+ Latency = (Latency * Scale) / 100;
+ }
+ return Latency;
+}
+
+std::optional<unsigned> SIInstrInfo::getOperandLatency(
+ const TargetSchedModel &SchedModel, const MachineInstr *DefMI,
+ unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const {
+ auto LatOpt = TargetInstrInfo::getOperandLatency(
+ SchedModel, DefMI, DefOperIdx, UseMI, UseOperIdx);
+ if (!LatOpt)
+ return std::nullopt;
+ unsigned Latency = *LatOpt;
+ if (DefMI && DefMI->mayLoad()) {
+ unsigned Scale = LoadLatencyScaleFactor;
+ if (isDS(*DefMI))
+ Scale = DSReadLatencyScaleFactor;
+ else if (isVMEM(*DefMI) || isFLAT(*DefMI))
+ Scale = VMEMLoadLatencyScaleFactor;
+ Latency = (Latency * Scale) / 100;
+ }
+ return Latency;
+}
+
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index dffb3d7459e6..e01e3030b100 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -93,6 +93,13 @@ private:
const GCNSubtarget &ST;
TargetSchedModel SchedModel;
mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
+ // Final load latency in the machine model is scalled by
+ // `Factor / 100 * Latency`
+ mutable unsigned LoadLatencyScaleFactor = 100;
+ // Separate scale factor for LDS (DS) read operations.
+ mutable unsigned DSReadLatencyScaleFactor = 100;
+ // Separate scale factor for VMEM/BUFFER/FLAT loads.
+ mutable unsigned VMEMLoadLatencyScaleFactor = 100;
// The inverse predicate should have the negative value.
enum BranchPredicate {
@@ -111,6 +118,38 @@ private:
static BranchPredicate getBranchPredicate(unsigned Opcode);
public:
+ void setLoadLatencyScaleFactor(unsigned Factor) const {
+ LoadLatencyScaleFactor = Factor;
+ }
+
+ unsigned getLoadLatencyScaleFactor() const { return LoadLatencyScaleFactor; }
+
+ // Control DS read (LDS) latency scaling independently when desired.
+ void setDSReadLatencyScaleFactor(unsigned Factor) const {
+ DSReadLatencyScaleFactor = Factor;
+ }
+ unsigned getDSReadLatencyScaleFactor() const {
+ return DSReadLatencyScaleFactor;
+ }
+
+ // Control VMEM/BUFFER/FLAT load latency scaling independently.
+ void setVMEMLoadLatencyScaleFactor(unsigned Factor) const {
+ VMEMLoadLatencyScaleFactor = Factor;
+ }
+ unsigned getVMEMLoadLatencyScaleFactor() const {
+ return VMEMLoadLatencyScaleFactor;
+ }
+
+ // TargetSchedModel latency hooks.
+ std::optional<unsigned>
+ getInstrLatency(const TargetSchedModel &TargetSchedModel,
+ const MachineInstr &MI) const override;
+ std::optional<unsigned> getOperandLatency(const TargetSchedModel &SchedModel,
+ const MachineInstr *DefMI,
+ unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const override;
+
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
MachineRegisterInfo &MRI,
const MachineOperand &SuperReg,