diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 148 |
1 files changed, 125 insertions, 23 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index d6958d9055fa..57f517bfba0e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -58,11 +58,17 @@ static cl::opt<bool> "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false)); +static cl::opt<bool> GCNTrackers( + "amdgpu-use-amdgpu-trackers", cl::Hidden, + cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), + cl::init(false)); + const unsigned ScheduleMetrics::ScaleFactor = 100; GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), - HasHighPressure(false) {} + DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) { +} void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) { GenericScheduler::initialize(DAG); @@ -148,17 +154,38 @@ static bool canUsePressureDiffs(const SUnit &SU) { return true; } -static void getRegisterPressures(bool AtTop, - const RegPressureTracker &RPTracker, SUnit *SU, - std::vector<unsigned> &Pressure, - std::vector<unsigned> &MaxPressure) { +static void getRegisterPressures( + bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, + std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure, + GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, + ScheduleDAGMI *DAG, const SIRegisterInfo *SRI) { // getDownwardPressure() and getUpwardPressure() make temporary changes to // the tracker, so we need to pass those function a non-const copy. RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); - if (AtTop) - TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); - else - TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); + if (!GCNTrackers) { + AtTop + ? TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure) + : TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); + + return; + } + + // GCNTrackers + Pressure.resize(4, 0); + MachineInstr *MI = SU->getInstr(); + GCNRegPressure NewPressure; + if (AtTop) { + GCNDownwardRPTracker TempDownwardTracker(DownwardTracker); + NewPressure = TempDownwardTracker.bumpDownwardPressure(MI, SRI); + } else { + GCNUpwardRPTracker TempUpwardTracker(UpwardTracker); + TempUpwardTracker.recede(*MI); + NewPressure = TempUpwardTracker.getPressure(); + } + Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum(); + Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = + NewPressure.getArchVGPRNum(); + Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum(); } void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, @@ -187,8 +214,9 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, // // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of // PressureDiffs. - if (AtTop || !canUsePressureDiffs(*SU)) { - getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure); + if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) { + getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure, + DownwardTracker, UpwardTracker, DAG, SRI); } else { // Reserve 4 slots. Pressure.resize(4, 0); @@ -206,7 +234,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, #ifdef EXPENSIVE_CHECKS std::vector<unsigned> CheckPressure, CheckMaxPressure; - getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure); + getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure, + DownwardTracker, UpwardTracker, DAG, SRI); if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] != CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] || Pressure[AMDGPU::RegisterPressureSets::VGPR_32] != @@ -294,8 +323,16 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, unsigned SGPRPressure = 0; unsigned VGPRPressure = 0; if (DAG->isTrackingPressure()) { - SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; - VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; + if (!GCNTrackers) { + SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; + VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; + } else { + GCNRPTracker *T = IsBottomUp + ? static_cast<GCNRPTracker *>(&UpwardTracker) + : static_cast<GCNRPTracker *>(&DownwardTracker); + SGPRPressure = T->getPressure().getSGPRNum(); + VGPRPressure = T->getPressure().getArchVGPRNum(); + } } ReadyQueue &Q = Zone.Available; for (SUnit *SU : Q) { @@ -444,6 +481,16 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { return SU; } +void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { + if (GCNTrackers) { + MachineInstr *MI = SU->getInstr(); + IsTopNode ? (void)DownwardTracker.advance(MI, false) + : UpwardTracker.recede(*MI); + } + + return GenericScheduler::schedNode(SU, IsTopNode); +} + GCNSchedStageID GCNSchedStrategy::getCurrentStage() { assert(CurrentStage && CurrentStage != SchedStages.end()); return *CurrentStage; @@ -470,12 +517,13 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const { } GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( - const MachineSchedContext *C) + const MachineSchedContext *C, bool IsLegacyScheduler) : GCNSchedStrategy(C) { SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); SchedStages.push_back(GCNSchedStageID::PreRARematerialize); + GCNTrackers = GCNTrackers & !IsLegacyScheduler; } GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C) @@ -571,7 +619,8 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive( MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S) : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()), MFI(*MF.getInfo<SIMachineFunctionInfo>()), - StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) { + StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy), + RegionLiveOuts(this, /*IsLiveOut=*/true) { LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); if (RelaxedOcc) { @@ -613,6 +662,14 @@ GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const { return RPTracker.moveMaxPressure(); } +static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, + MachineBasicBlock::iterator RegionEnd) { + auto REnd = RegionEnd == RegionBegin->getParent()->end() + ? std::prev(RegionEnd) + : RegionEnd; + return &*skipDebugInstructionsBackward(REnd, RegionBegin); +} + void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB) { GCNDownwardRPTracker RPTracker(*LIS); @@ -687,20 +744,45 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, } DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> -GCNScheduleDAGMILive::getBBLiveInMap() const { +GCNScheduleDAGMILive::getRegionLiveInMap() const { assert(!Regions.empty()); - std::vector<MachineInstr *> BBStarters; - BBStarters.reserve(Regions.size()); + std::vector<MachineInstr *> RegionFirstMIs; + RegionFirstMIs.reserve(Regions.size()); auto I = Regions.rbegin(), E = Regions.rend(); auto *BB = I->first->getParent(); do { auto *MI = &*skipDebugInstructionsForward(I->first, I->second); - BBStarters.push_back(MI); + RegionFirstMIs.push_back(MI); do { ++I; } while (I != E && I->first->getParent() == BB); } while (I != E); - return getLiveRegMap(BBStarters, false /*After*/, *LIS); + return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS); +} + +DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> +GCNScheduleDAGMILive::getRegionLiveOutMap() const { + assert(!Regions.empty()); + std::vector<MachineInstr *> RegionLastMIs; + RegionLastMIs.reserve(Regions.size()); + for (auto &[RegionBegin, RegionEnd] : reverse(Regions)) + RegionLastMIs.push_back(getLastMIForRegion(RegionBegin, RegionEnd)); + + return getLiveRegMap(RegionLastMIs, /*After=*/true, *LIS); +} + +void RegionPressureMap::buildLiveRegMap() { + IdxToInstruction.clear(); + + RegionLiveRegMap = + IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap(); + for (unsigned I = 0; I < DAG->Regions.size(); I++) { + MachineInstr *RegionKey = + IsLiveOut + ? getLastMIForRegion(DAG->Regions[I].first, DAG->Regions[I].second) + : &*DAG->Regions[I].first; + IdxToInstruction[I] = RegionKey; + } } void GCNScheduleDAGMILive::finalizeSchedule() { @@ -726,8 +808,11 @@ void GCNScheduleDAGMILive::finalizeSchedule() { void GCNScheduleDAGMILive::runSchedStages() { LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); - if (!Regions.empty()) - BBLiveInMap = getBBLiveInMap(); + if (!Regions.empty()) { + BBLiveInMap = getRegionLiveInMap(); + if (GCNTrackers) + RegionLiveOuts.buildLiveRegMap(); + } GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl); while (S.advanceStage()) { @@ -745,6 +830,19 @@ void GCNScheduleDAGMILive::runSchedStages() { continue; } + if (GCNTrackers) { + GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker(); + GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker(); + GCNRPTracker::LiveRegSet *RegionLiveIns = + &LiveIns[Stage->getRegionIdx()]; + + reinterpret_cast<GCNRPTracker *>(DownwardTracker) + ->reset(MRI, *RegionLiveIns); + reinterpret_cast<GCNRPTracker *>(UpwardTracker) + ->reset(MRI, RegionLiveOuts.getLiveRegsForRegionIdx( + Stage->getRegionIdx())); + } + ScheduleDAGMILive::schedule(); Stage->finalizeGCNRegion(); } @@ -1015,6 +1113,7 @@ void GCNSchedStage::finalizeGCNRegion() { void GCNSchedStage::checkScheduling() { // Check the results of scheduling. PressureAfter = DAG.getRealRegPressure(RegionIdx); + LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter)); LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n"); @@ -1586,6 +1685,9 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST, DAG.Regions = NewRegions; DAG.RescheduleRegions = NewRescheduleRegions; + if (GCNTrackers) + DAG.RegionLiveOuts.buildLiveRegMap(); + SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); |
