diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 201 |
1 files changed, 102 insertions, 99 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 9a082921d4f7..e804f81c36db 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -205,11 +205,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { return Parent->getEnclosingBlockWithPredecessors(); } -void VPBlockBase::deleteCFG(VPBlockBase *Entry) { - for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry))) - delete Block; -} - VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { iterator It = begin(); while (It != end() && It->isPhi()) @@ -221,9 +216,10 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan, - Type *CanonicalIVTy) + Loop *CurrentParentLoop, Type *CanonicalIVTy) : TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan), - LVer(nullptr), TypeAnalysis(CanonicalIVTy) {} + CurrentParentLoop(CurrentParentLoop), LVer(nullptr), + TypeAnalysis(CanonicalIVTy) {} Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) { if (Def->isLiveIn()) @@ -474,6 +470,13 @@ void VPIRBasicBlock::execute(VPTransformState *State) { connectToPredecessors(State->CFG); } +VPIRBasicBlock *VPIRBasicBlock::clone() { + auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB); + for (VPRecipeBase &R : Recipes) + NewBlock->appendRecipe(R.clone()); + return NewBlock; +} + void VPBasicBlock::execute(VPTransformState *State) { bool Replica = bool(State->Lane); BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. @@ -484,11 +487,9 @@ void VPBasicBlock::execute(VPTransformState *State) { }; // 1. Create an IR basic block. - if (this == getPlan()->getVectorPreheader() || - (Replica && this == getParent()->getEntry()) || + if ((Replica && this == getParent()->getEntry()) || IsReplicateRegion(getSingleHierarchicalPredecessor())) { // Reuse the previous basic block if the current VPBB is either - // * the vector preheader, // * the entry to a replicate region, or // * the exit of a replicate region. State->CFG.VPBB2IRBB[this] = NewBB; @@ -500,8 +501,8 @@ void VPBasicBlock::execute(VPTransformState *State) { UnreachableInst *Terminator = State->Builder.CreateUnreachable(); // Register NewBB in its loop. In innermost loops its the same for all // BB's. - if (State->CurrentVectorLoop) - State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); + if (State->CurrentParentLoop) + State->CurrentParentLoop->addBasicBlockToLoop(NewBB, *State->LI); State->Builder.SetInsertPoint(Terminator); State->CFG.PrevBB = NewBB; @@ -513,14 +514,11 @@ void VPBasicBlock::execute(VPTransformState *State) { executeRecipes(State, NewBB); } -void VPBasicBlock::dropAllReferences(VPValue *NewValue) { - for (VPRecipeBase &R : Recipes) { - for (auto *Def : R.definedValues()) - Def->replaceAllUsesWith(NewValue); - - for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) - R.setOperand(I, NewValue); - } +VPBasicBlock *VPBasicBlock::clone() { + auto *NewBlock = getPlan()->createVPBasicBlock(getName()); + for (VPRecipeBase &R : *this) + NewBlock->appendRecipe(R.clone()); + return NewBlock; } void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) { @@ -541,7 +539,7 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { SmallVector<VPBlockBase *, 2> Succs(successors()); // Create new empty block after the block to split. - auto *SplitBlock = new VPBasicBlock(getName() + ".split"); + auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split"); VPBlockUtils::insertBlockAfter(SplitBlock, this); // Finally, move the recipes starting at SplitAt to new block. @@ -557,7 +555,9 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) { if (P && P->isReplicator()) { P = P->getParent(); - assert(!cast<VPRegionBlock>(P)->isReplicator() && + // Multiple loop regions can be nested, but replicate regions can only be + // nested inside a loop region or must be outside any other region. + assert((!P || !cast<VPRegionBlock>(P)->isReplicator()) && "unexpected nested replicate regions"); } return P; @@ -701,37 +701,30 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) { VPRegionBlock *VPRegionBlock::clone() { const auto &[NewEntry, NewExiting] = cloneFrom(getEntry()); - auto *NewRegion = - new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator()); + auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting, + getName(), isReplicator()); for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) Block->setParent(NewRegion); return NewRegion; } -void VPRegionBlock::dropAllReferences(VPValue *NewValue) { - for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) - // Drop all references in VPBasicBlocks and replace all uses with - // DummyValue. - Block->dropAllReferences(NewValue); -} - void VPRegionBlock::execute(VPTransformState *State) { ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(Entry); if (!isReplicator()) { // Create and register the new vector loop. - Loop *PrevLoop = State->CurrentVectorLoop; - State->CurrentVectorLoop = State->LI->AllocateLoop(); + Loop *PrevLoop = State->CurrentParentLoop; + State->CurrentParentLoop = State->LI->AllocateLoop(); BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()]; Loop *ParentLoop = State->LI->getLoopFor(VectorPH); // Insert the new loop into the loop nest and register the new basic blocks // before calling any utilities such as SCEV that require valid LoopInfo. if (ParentLoop) - ParentLoop->addChildLoop(State->CurrentVectorLoop); + ParentLoop->addChildLoop(State->CurrentParentLoop); else - State->LI->addTopLevelLoop(State->CurrentVectorLoop); + State->LI->addTopLevelLoop(State->CurrentParentLoop); // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { @@ -739,7 +732,7 @@ void VPRegionBlock::execute(VPTransformState *State) { Block->execute(State); } - State->CurrentVectorLoop = PrevLoop; + State->CurrentParentLoop = PrevLoop; return; } @@ -822,17 +815,26 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, #endif VPlan::VPlan(Loop *L) { - setEntry(VPIRBasicBlock::fromBasicBlock(L->getLoopPreheader())); - ScalarHeader = VPIRBasicBlock::fromBasicBlock(L->getHeader()); + setEntry(createVPIRBasicBlock(L->getLoopPreheader())); + ScalarHeader = createVPIRBasicBlock(L->getHeader()); } VPlan::~VPlan() { - if (Entry) { - VPValue DummyValue; - for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) - Block->dropAllReferences(&DummyValue); - - VPBlockBase::deleteCFG(Entry); + VPValue DummyValue; + + for (auto *VPB : CreatedBlocks) { + if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) { + // Replace all operands of recipes and all VPValues defined in VPBB with + // DummyValue so the block can be deleted. + for (VPRecipeBase &R : *VPBB) { + for (auto *Def : R.definedValues()) + Def->replaceAllUsesWith(&DummyValue); + + for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) + R.setOperand(I, &DummyValue); + } + } + delete VPB; } for (VPValue *VPV : VPLiveInsToFree) delete VPV; @@ -840,14 +842,6 @@ VPlan::~VPlan() { delete BackedgeTakenCount; } -VPIRBasicBlock *VPIRBasicBlock::fromBasicBlock(BasicBlock *IRBB) { - auto *VPIRBB = new VPIRBasicBlock(IRBB); - for (Instruction &I : - make_range(IRBB->begin(), IRBB->getTerminator()->getIterator())) - VPIRBB->appendRecipe(new VPIRInstruction(I)); - return VPIRBB; -} - VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, @@ -861,7 +855,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // an epilogue vector loop, the original entry block here will be replaced by // a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after // generating code for the main vector loop. - VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); + VPBasicBlock *VecPreheader = Plan->createVPBasicBlock("vector.ph"); VPBlockUtils::connectBlocks(Plan->getEntry(), VecPreheader); // Create SCEV and VPValue for the trip count. @@ -878,17 +872,17 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // Create VPRegionBlock, with empty header and latch blocks, to be filled // during processing later. - VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); - VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); + VPBasicBlock *HeaderVPBB = Plan->createVPBasicBlock("vector.body"); + VPBasicBlock *LatchVPBB = Plan->createVPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); - auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop", - false /*isReplicator*/); + auto *TopRegion = Plan->createVPRegionBlock( + HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/); VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); - VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBasicBlock *MiddleVPBB = Plan->createVPBasicBlock("middle.block"); VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); - VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); + VPBasicBlock *ScalarPH = Plan->createVPBasicBlock("scalar.ph"); VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader); if (!RequiresScalarEpilogueCheck) { VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); @@ -904,7 +898,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // we unconditionally branch to the scalar preheader. Do nothing. // 3) Otherwise, construct a runtime check. BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); - auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); + auto *VPExitBlock = Plan->createVPIRBasicBlock(IRExitBlock); // The connection order corresponds to the operands of the conditional branch. VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); @@ -942,7 +936,8 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); // FIXME: Model VF * UF computation completely in VPlan. - assert(VFxUF.getNumUsers() && "VFxUF expected to always have users"); + assert((!getVectorLoopRegion() || VFxUF.getNumUsers()) && + "VFxUF expected to always have users"); unsigned UF = getUF(); if (VF.getNumUsers()) { Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF); @@ -955,22 +950,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, } } -/// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p -/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must -/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All -/// successors of VPBB, if any, are rewired to the new VPIRBasicBlock. -static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { - VPIRBasicBlock *IRVPBB = VPIRBasicBlock::fromBasicBlock(IRBB); - for (auto &R : make_early_inc_range(*VPBB)) { - assert(!R.isPhi() && "Tried to move phi recipe to end of block"); - R.moveBefore(*IRVPBB, IRVPBB->end()); - } - - VPBlockUtils::reassociateBlocks(VPBB, IRVPBB); - - delete VPBB; -} - /// Generate the code inside the preheader and body of the vectorized loop. /// Assumes a single pre-header basic-block was created for this. Introduce /// additional basic-blocks as needed, and fill them all. @@ -978,25 +957,13 @@ void VPlan::execute(VPTransformState *State) { // Initialize CFG state. State->CFG.PrevVPBB = nullptr; State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); - BasicBlock *VectorPreHeader = State->CFG.PrevBB; - State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); // Disconnect VectorPreHeader from ExitBB in both the CFG and DT. + BasicBlock *VectorPreHeader = State->CFG.PrevBB; cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr); State->CFG.DTU.applyUpdates( {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}}); - // Replace regular VPBB's for the vector preheader, middle and scalar - // preheader blocks with VPIRBasicBlocks wrapping their IR blocks. The IR - // blocks are created during skeleton creation, so we can only create the - // VPIRBasicBlocks now during VPlan execution rather than earlier during VPlan - // construction. - BasicBlock *MiddleBB = State->CFG.ExitBB; - BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor(); - replaceVPBBWithIRVPBB(getVectorPreheader(), VectorPreHeader); - replaceVPBBWithIRVPBB(getMiddleBlock(), MiddleBB); - replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh); - LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF << ", UF=" << getUF() << '\n'); setName("Final VPlan"); @@ -1005,6 +972,8 @@ void VPlan::execute(VPTransformState *State) { // Disconnect the middle block from its single successor (the scalar loop // header) in both the CFG and DT. The branch will be recreated during VPlan // execution. + BasicBlock *MiddleBB = State->CFG.ExitBB; + BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor(); auto *BrInst = new UnreachableInst(MiddleBB->getContext()); BrInst->insertBefore(MiddleBB->getTerminator()); MiddleBB->getTerminator()->eraseFromParent(); @@ -1022,12 +991,18 @@ void VPlan::execute(VPTransformState *State) { for (VPBlockBase *Block : RPOT) Block->execute(State); - VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); + State->CFG.DTU.flush(); + + auto *LoopRegion = getVectorLoopRegion(); + if (!LoopRegion) + return; + + VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock(); BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; // Fix the latch value of canonical, reduction and first-order recurrences // phis in the vector loop. - VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); + VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { // Skip phi-like recipes that generate their backedege values themselves. if (isa<VPWidenPHIRecipe>(&R)) @@ -1066,8 +1041,6 @@ void VPlan::execute(VPTransformState *State) { Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar); cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB); } - - State->CFG.DTU.flush(); } InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { @@ -1080,14 +1053,14 @@ VPRegionBlock *VPlan::getVectorLoopRegion() { // TODO: Cache if possible. for (VPBlockBase *B : vp_depth_first_shallow(getEntry())) if (auto *R = dyn_cast<VPRegionBlock>(B)) - return R; + return R->isReplicator() ? nullptr : R; return nullptr; } const VPRegionBlock *VPlan::getVectorLoopRegion() const { for (const VPBlockBase *B : vp_depth_first_shallow(getEntry())) if (auto *R = dyn_cast<VPRegionBlock>(B)) - return R; + return R->isReplicator() ? nullptr : R; return nullptr; } @@ -1217,6 +1190,7 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, } VPlan *VPlan::duplicate() { + unsigned NumBlocksBeforeCloning = CreatedBlocks.size(); // Clone blocks. const auto &[NewEntry, __] = cloneFrom(Entry); @@ -1257,9 +1231,32 @@ VPlan *VPlan::duplicate() { assert(Old2NewVPValues.contains(TripCount) && "TripCount must have been added to Old2NewVPValues"); NewPlan->TripCount = Old2NewVPValues[TripCount]; + + // Transfer all cloned blocks (the second half of all current blocks) from + // current to new VPlan. + unsigned NumBlocksAfterCloning = CreatedBlocks.size(); + for (unsigned I : + seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning)) + NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]); + CreatedBlocks.truncate(NumBlocksBeforeCloning); + return NewPlan; } +VPIRBasicBlock *VPlan::createEmptyVPIRBasicBlock(BasicBlock *IRBB) { + auto *VPIRBB = new VPIRBasicBlock(IRBB); + CreatedBlocks.push_back(VPIRBB); + return VPIRBB; +} + +VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) { + auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB); + for (Instruction &I : + make_range(IRBB->begin(), IRBB->getTerminator()->getIterator())) + VPIRBB->appendRecipe(new VPIRInstruction(I)); + return VPIRBB; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) Twine VPlanPrinter::getUID(const VPBlockBase *Block) { @@ -1409,11 +1406,17 @@ void VPlanIngredient::print(raw_ostream &O) const { #endif -bool VPValue::isDefinedOutsideLoopRegions() const { - return !hasDefiningRecipe() || - !getDefiningRecipe()->getParent()->getEnclosingLoopRegion(); +/// Returns true if there is a vector loop region and \p VPV is defined in a +/// loop region. +static bool isDefinedInsideLoopRegions(const VPValue *VPV) { + const VPRecipeBase *DefR = VPV->getDefiningRecipe(); + return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() || + DefR->getParent()->getEnclosingLoopRegion()); } +bool VPValue::isDefinedOutsideLoopRegions() const { + return !isDefinedInsideLoopRegions(this); +} void VPValue::replaceAllUsesWith(VPValue *New) { replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; }); } |
