diff options
Diffstat (limited to 'llvm/lib/CodeGen/RegAllocFast.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/RegAllocFast.cpp | 97 |
1 files changed, 59 insertions, 38 deletions
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 09ce8c42a385..b4660d4085ff 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -177,7 +177,7 @@ private: class RegAllocFastImpl { public: - RegAllocFastImpl(const RegClassFilterFunc F = allocateAllRegClasses, + RegAllocFastImpl(const RegClassFilterFunc F = nullptr, bool ClearVirtRegs_ = true) : ShouldAllocateClass(F), StackSlotForVirtReg(-1), ClearVirtRegs(ClearVirtRegs_) {} @@ -253,12 +253,23 @@ private: SmallVector<MachineInstr *, 32> Coalesced; - using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>; - /// Set of register units that are used in the current instruction, and so + /// Track register units that are used in the current instruction, and so /// cannot be allocated. - RegUnitSet UsedInInstr; - RegUnitSet PhysRegUses; - SmallVector<uint16_t, 8> DefOperandIndexes; + /// + /// In the first phase (tied defs/early clobber), we consider also physical + /// uses, afterwards, we don't. If the lowest bit isn't set, it's a solely + /// physical use (markPhysRegUsedInInstr), otherwise, it's a normal use. To + /// avoid resetting the entire vector after every instruction, we track the + /// instruction "generation" in the remaining 31 bits -- this means, that if + /// UsedInInstr[Idx] < InstrGen, the register unit is unused. InstrGen is + /// never zero and always incremented by two. + /// + /// Don't allocate inline storage: the number of register units is typically + /// quite large (e.g., AArch64 > 100, X86 > 200, AMDGPU > 1000). + uint32_t InstrGen; + SmallVector<unsigned, 0> UsedInInstr; + + SmallVector<unsigned, 8> DefOperandIndexes; // Register masks attached to the current instruction. SmallVector<const uint32_t *> RegMasks; @@ -271,7 +282,7 @@ private: /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { for (MCRegUnit Unit : TRI->regunits(PhysReg)) - UsedInInstr.insert(Unit); + UsedInInstr[Unit] = InstrGen | 1; } // Check if physreg is clobbered by instruction's regmask(s). @@ -285,26 +296,25 @@ private: bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const { if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg)) return true; - for (MCRegUnit Unit : TRI->regunits(PhysReg)) { - if (UsedInInstr.count(Unit)) - return true; - if (LookAtPhysRegUses && PhysRegUses.count(Unit)) + for (MCRegUnit Unit : TRI->regunits(PhysReg)) + if (UsedInInstr[Unit] >= (InstrGen | !LookAtPhysRegUses)) return true; - } return false; } /// Mark physical register as being used in a register use operand. /// This is only used by the special livethrough handling code. void markPhysRegUsedInInstr(MCPhysReg PhysReg) { - for (MCRegUnit Unit : TRI->regunits(PhysReg)) - PhysRegUses.insert(Unit); + for (MCRegUnit Unit : TRI->regunits(PhysReg)) { + assert(UsedInInstr[Unit] <= InstrGen && "non-phys use before phys use?"); + UsedInInstr[Unit] = InstrGen; + } } /// Remove mark of physical register being used in the instruction. void unmarkRegUsedInInstr(MCPhysReg PhysReg) { for (MCRegUnit Unit : TRI->regunits(PhysReg)) - UsedInInstr.erase(Unit); + UsedInInstr[Unit] = 0; } enum : unsigned { @@ -322,7 +332,7 @@ public: private: void allocateBasicBlock(MachineBasicBlock &MBB); - void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts, + void addRegClassDefCounts(MutableArrayRef<unsigned> RegClassDefCounts, Register Reg) const; void findAndSortDefOperandIndexes(const MachineInstr &MI); @@ -387,8 +397,7 @@ class RegAllocFast : public MachineFunctionPass { public: static char ID; - RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses, - bool ClearVirtRegs_ = true) + RegAllocFast(const RegClassFilterFunc F = nullptr, bool ClearVirtRegs_ = true) : MachineFunctionPass(ID), Impl(F, ClearVirtRegs_) {} bool runOnMachineFunction(MachineFunction &MF) override { @@ -431,6 +440,8 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, bool RegAllocFastImpl::shouldAllocateRegister(const Register Reg) const { assert(Reg.isVirtual()); + if (!ShouldAllocateClass) + return true; const TargetRegisterClass &RC = *MRI->getRegClass(Reg); return ShouldAllocateClass(*TRI, RC); } @@ -1253,7 +1264,7 @@ void RegAllocFastImpl::dumpState() const { /// Count number of defs consumed from each register class by \p Reg void RegAllocFastImpl::addRegClassDefCounts( - std::vector<unsigned> &RegClassDefCounts, Register Reg) const { + MutableArrayRef<unsigned> RegClassDefCounts, Register Reg) const { assert(RegClassDefCounts.size() == TRI->getNumRegClasses()); if (Reg.isVirtual()) { @@ -1289,10 +1300,6 @@ void RegAllocFastImpl::addRegClassDefCounts( void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) { DefOperandIndexes.clear(); - // Track number of defs which may consume a register from the class. - std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0); - assert(RegClassDefCounts[0] == 0); - LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n"); for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { const MachineOperand &MO = MI.getOperand(I); @@ -1306,15 +1313,27 @@ void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) { } } - if (MO.isDef()) { - if (Reg.isVirtual() && shouldAllocateRegister(Reg)) - DefOperandIndexes.push_back(I); - - addRegClassDefCounts(RegClassDefCounts, Reg); - } + if (MO.isDef() && Reg.isVirtual() && shouldAllocateRegister(Reg)) + DefOperandIndexes.push_back(I); } - llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) { + // Most instructions only have one virtual def, so there's no point in + // computing the possible number of defs for every register class. + if (DefOperandIndexes.size() <= 1) + return; + + // Track number of defs which may consume a register from the class. This is + // used to assign registers for possibly-too-small classes first. Example: + // defs are eax, 3 * gr32_abcd, 2 * gr32 => we want to assign the gr32_abcd + // registers first so that the gr32 don't use the gr32_abcd registers before + // we assign these. + SmallVector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0); + + for (const MachineOperand &MO : MI.operands()) + if (MO.isReg() && MO.isDef()) + addRegClassDefCounts(RegClassDefCounts, MO.getReg()); + + llvm::sort(DefOperandIndexes, [&](unsigned I0, unsigned I1) { const MachineOperand &MO0 = MI.getOperand(I0); const MachineOperand &MO1 = MI.getOperand(I1); Register Reg0 = MO0.getReg(); @@ -1373,7 +1392,12 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) { // - The "free def operands" step has to come last instead of first for tied // operands and early-clobbers. - UsedInInstr.clear(); + InstrGen += 2; + // In the event we ever get more than 2**31 instructions... + if (LLVM_UNLIKELY(InstrGen == 0)) { + UsedInInstr.assign(UsedInInstr.size(), 0); + InstrGen = 2; + } RegMasks.clear(); BundleVirtRegsMap.clear(); @@ -1434,12 +1458,10 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) { // heuristic to figure out a good operand order before doing // assignments. if (NeedToAssignLiveThroughs) { - PhysRegUses.clear(); - while (ReArrangedImplicitOps) { ReArrangedImplicitOps = false; findAndSortDefOperandIndexes(MI); - for (uint16_t OpIdx : DefOperandIndexes) { + for (unsigned OpIdx : DefOperandIndexes) { MachineOperand &MO = MI.getOperand(OpIdx); LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); Register Reg = MO.getReg(); @@ -1760,10 +1782,8 @@ bool RegAllocFastImpl::runOnMachineFunction(MachineFunction &MF) { MRI->freezeReservedRegs(); RegClassInfo.runOnMachineFunction(MF); unsigned NumRegUnits = TRI->getNumRegUnits(); - UsedInInstr.clear(); - UsedInInstr.setUniverse(NumRegUnits); - PhysRegUses.clear(); - PhysRegUses.setUniverse(NumRegUnits); + InstrGen = 0; + UsedInInstr.assign(NumRegUnits, 0); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers @@ -1790,6 +1810,7 @@ bool RegAllocFastImpl::runOnMachineFunction(MachineFunction &MF) { PreservedAnalyses RegAllocFastPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &) { + MFPropsModifier _(*this, MF); RegAllocFastImpl Impl(Opts.Filter, Opts.ClearVRegs); bool Changed = Impl.runOnMachineFunction(MF); if (!Changed) |
