summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlan.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp201
1 files changed, 102 insertions, 99 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 9a082921d4f7..e804f81c36db 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -205,11 +205,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
return Parent->getEnclosingBlockWithPredecessors();
}
-void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
- for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry)))
- delete Block;
-}
-
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
iterator It = begin();
while (It != end() && It->isPhi())
@@ -221,9 +216,10 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
ElementCount VF, unsigned UF, LoopInfo *LI,
DominatorTree *DT, IRBuilderBase &Builder,
InnerLoopVectorizer *ILV, VPlan *Plan,
- Type *CanonicalIVTy)
+ Loop *CurrentParentLoop, Type *CanonicalIVTy)
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
- LVer(nullptr), TypeAnalysis(CanonicalIVTy) {}
+ CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
+ TypeAnalysis(CanonicalIVTy) {}
Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
if (Def->isLiveIn())
@@ -474,6 +470,13 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
connectToPredecessors(State->CFG);
}
+VPIRBasicBlock *VPIRBasicBlock::clone() {
+ auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
+ for (VPRecipeBase &R : Recipes)
+ NewBlock->appendRecipe(R.clone());
+ return NewBlock;
+}
+
void VPBasicBlock::execute(VPTransformState *State) {
bool Replica = bool(State->Lane);
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
@@ -484,11 +487,9 @@ void VPBasicBlock::execute(VPTransformState *State) {
};
// 1. Create an IR basic block.
- if (this == getPlan()->getVectorPreheader() ||
- (Replica && this == getParent()->getEntry()) ||
+ if ((Replica && this == getParent()->getEntry()) ||
IsReplicateRegion(getSingleHierarchicalPredecessor())) {
// Reuse the previous basic block if the current VPBB is either
- // * the vector preheader,
// * the entry to a replicate region, or
// * the exit of a replicate region.
State->CFG.VPBB2IRBB[this] = NewBB;
@@ -500,8 +501,8 @@ void VPBasicBlock::execute(VPTransformState *State) {
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
// Register NewBB in its loop. In innermost loops its the same for all
// BB's.
- if (State->CurrentVectorLoop)
- State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
+ if (State->CurrentParentLoop)
+ State->CurrentParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
State->Builder.SetInsertPoint(Terminator);
State->CFG.PrevBB = NewBB;
@@ -513,14 +514,11 @@ void VPBasicBlock::execute(VPTransformState *State) {
executeRecipes(State, NewBB);
}
-void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
- for (VPRecipeBase &R : Recipes) {
- for (auto *Def : R.definedValues())
- Def->replaceAllUsesWith(NewValue);
-
- for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
- R.setOperand(I, NewValue);
- }
+VPBasicBlock *VPBasicBlock::clone() {
+ auto *NewBlock = getPlan()->createVPBasicBlock(getName());
+ for (VPRecipeBase &R : *this)
+ NewBlock->appendRecipe(R.clone());
+ return NewBlock;
}
void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) {
@@ -541,7 +539,7 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
SmallVector<VPBlockBase *, 2> Succs(successors());
// Create new empty block after the block to split.
- auto *SplitBlock = new VPBasicBlock(getName() + ".split");
+ auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
VPBlockUtils::insertBlockAfter(SplitBlock, this);
// Finally, move the recipes starting at SplitAt to new block.
@@ -557,7 +555,9 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
if (P && P->isReplicator()) {
P = P->getParent();
- assert(!cast<VPRegionBlock>(P)->isReplicator() &&
+ // Multiple loop regions can be nested, but replicate regions can only be
+ // nested inside a loop region or must be outside any other region.
+ assert((!P || !cast<VPRegionBlock>(P)->isReplicator()) &&
"unexpected nested replicate regions");
}
return P;
@@ -701,37 +701,30 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
- auto *NewRegion =
- new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
+ auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
+ getName(), isReplicator());
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);
return NewRegion;
}
-void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
- for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
- // Drop all references in VPBasicBlocks and replace all uses with
- // DummyValue.
- Block->dropAllReferences(NewValue);
-}
-
void VPRegionBlock::execute(VPTransformState *State) {
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
RPOT(Entry);
if (!isReplicator()) {
// Create and register the new vector loop.
- Loop *PrevLoop = State->CurrentVectorLoop;
- State->CurrentVectorLoop = State->LI->AllocateLoop();
+ Loop *PrevLoop = State->CurrentParentLoop;
+ State->CurrentParentLoop = State->LI->AllocateLoop();
BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
// Insert the new loop into the loop nest and register the new basic blocks
// before calling any utilities such as SCEV that require valid LoopInfo.
if (ParentLoop)
- ParentLoop->addChildLoop(State->CurrentVectorLoop);
+ ParentLoop->addChildLoop(State->CurrentParentLoop);
else
- State->LI->addTopLevelLoop(State->CurrentVectorLoop);
+ State->LI->addTopLevelLoop(State->CurrentParentLoop);
// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
@@ -739,7 +732,7 @@ void VPRegionBlock::execute(VPTransformState *State) {
Block->execute(State);
}
- State->CurrentVectorLoop = PrevLoop;
+ State->CurrentParentLoop = PrevLoop;
return;
}
@@ -822,17 +815,26 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
#endif
VPlan::VPlan(Loop *L) {
- setEntry(VPIRBasicBlock::fromBasicBlock(L->getLoopPreheader()));
- ScalarHeader = VPIRBasicBlock::fromBasicBlock(L->getHeader());
+ setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
+ ScalarHeader = createVPIRBasicBlock(L->getHeader());
}
VPlan::~VPlan() {
- if (Entry) {
- VPValue DummyValue;
- for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
- Block->dropAllReferences(&DummyValue);
-
- VPBlockBase::deleteCFG(Entry);
+ VPValue DummyValue;
+
+ for (auto *VPB : CreatedBlocks) {
+ if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
+ // Replace all operands of recipes and all VPValues defined in VPBB with
+ // DummyValue so the block can be deleted.
+ for (VPRecipeBase &R : *VPBB) {
+ for (auto *Def : R.definedValues())
+ Def->replaceAllUsesWith(&DummyValue);
+
+ for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
+ R.setOperand(I, &DummyValue);
+ }
+ }
+ delete VPB;
}
for (VPValue *VPV : VPLiveInsToFree)
delete VPV;
@@ -840,14 +842,6 @@ VPlan::~VPlan() {
delete BackedgeTakenCount;
}
-VPIRBasicBlock *VPIRBasicBlock::fromBasicBlock(BasicBlock *IRBB) {
- auto *VPIRBB = new VPIRBasicBlock(IRBB);
- for (Instruction &I :
- make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
- VPIRBB->appendRecipe(new VPIRInstruction(I));
- return VPIRBB;
-}
-
VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
@@ -861,7 +855,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// an epilogue vector loop, the original entry block here will be replaced by
// a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after
// generating code for the main vector loop.
- VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
+ VPBasicBlock *VecPreheader = Plan->createVPBasicBlock("vector.ph");
VPBlockUtils::connectBlocks(Plan->getEntry(), VecPreheader);
// Create SCEV and VPValue for the trip count.
@@ -878,17 +872,17 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// Create VPRegionBlock, with empty header and latch blocks, to be filled
// during processing later.
- VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
- VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
+ VPBasicBlock *HeaderVPBB = Plan->createVPBasicBlock("vector.body");
+ VPBasicBlock *LatchVPBB = Plan->createVPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
- auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop",
- false /*isReplicator*/);
+ auto *TopRegion = Plan->createVPRegionBlock(
+ HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
- VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
+ VPBasicBlock *MiddleVPBB = Plan->createVPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
- VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
+ VPBasicBlock *ScalarPH = Plan->createVPBasicBlock("scalar.ph");
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
if (!RequiresScalarEpilogueCheck) {
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
@@ -904,7 +898,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// we unconditionally branch to the scalar preheader. Do nothing.
// 3) Otherwise, construct a runtime check.
BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
- auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock);
+ auto *VPExitBlock = Plan->createVPIRBasicBlock(IRExitBlock);
// The connection order corresponds to the operands of the conditional branch.
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
@@ -942,7 +936,8 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
- assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
+ assert((!getVectorLoopRegion() || VFxUF.getNumUsers()) &&
+ "VFxUF expected to always have users");
unsigned UF = getUF();
if (VF.getNumUsers()) {
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
@@ -955,22 +950,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
}
}
-/// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p
-/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
-/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
-/// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
-static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
- VPIRBasicBlock *IRVPBB = VPIRBasicBlock::fromBasicBlock(IRBB);
- for (auto &R : make_early_inc_range(*VPBB)) {
- assert(!R.isPhi() && "Tried to move phi recipe to end of block");
- R.moveBefore(*IRVPBB, IRVPBB->end());
- }
-
- VPBlockUtils::reassociateBlocks(VPBB, IRVPBB);
-
- delete VPBB;
-}
-
/// Generate the code inside the preheader and body of the vectorized loop.
/// Assumes a single pre-header basic-block was created for this. Introduce
/// additional basic-blocks as needed, and fill them all.
@@ -978,25 +957,13 @@ void VPlan::execute(VPTransformState *State) {
// Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
- BasicBlock *VectorPreHeader = State->CFG.PrevBB;
- State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
// Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
+ BasicBlock *VectorPreHeader = State->CFG.PrevBB;
cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
- // Replace regular VPBB's for the vector preheader, middle and scalar
- // preheader blocks with VPIRBasicBlocks wrapping their IR blocks. The IR
- // blocks are created during skeleton creation, so we can only create the
- // VPIRBasicBlocks now during VPlan execution rather than earlier during VPlan
- // construction.
- BasicBlock *MiddleBB = State->CFG.ExitBB;
- BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor();
- replaceVPBBWithIRVPBB(getVectorPreheader(), VectorPreHeader);
- replaceVPBBWithIRVPBB(getMiddleBlock(), MiddleBB);
- replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh);
-
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
<< ", UF=" << getUF() << '\n');
setName("Final VPlan");
@@ -1005,6 +972,8 @@ void VPlan::execute(VPTransformState *State) {
// Disconnect the middle block from its single successor (the scalar loop
// header) in both the CFG and DT. The branch will be recreated during VPlan
// execution.
+ BasicBlock *MiddleBB = State->CFG.ExitBB;
+ BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor();
auto *BrInst = new UnreachableInst(MiddleBB->getContext());
BrInst->insertBefore(MiddleBB->getTerminator());
MiddleBB->getTerminator()->eraseFromParent();
@@ -1022,12 +991,18 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : RPOT)
Block->execute(State);
- VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
+ State->CFG.DTU.flush();
+
+ auto *LoopRegion = getVectorLoopRegion();
+ if (!LoopRegion)
+ return;
+
+ VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
- VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
for (VPRecipeBase &R : Header->phis()) {
// Skip phi-like recipes that generate their backedege values themselves.
if (isa<VPWidenPHIRecipe>(&R))
@@ -1066,8 +1041,6 @@ void VPlan::execute(VPTransformState *State) {
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
-
- State->CFG.DTU.flush();
}
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1080,14 +1053,14 @@ VPRegionBlock *VPlan::getVectorLoopRegion() {
// TODO: Cache if possible.
for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
if (auto *R = dyn_cast<VPRegionBlock>(B))
- return R;
+ return R->isReplicator() ? nullptr : R;
return nullptr;
}
const VPRegionBlock *VPlan::getVectorLoopRegion() const {
for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
if (auto *R = dyn_cast<VPRegionBlock>(B))
- return R;
+ return R->isReplicator() ? nullptr : R;
return nullptr;
}
@@ -1217,6 +1190,7 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
}
VPlan *VPlan::duplicate() {
+ unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
// Clone blocks.
const auto &[NewEntry, __] = cloneFrom(Entry);
@@ -1257,9 +1231,32 @@ VPlan *VPlan::duplicate() {
assert(Old2NewVPValues.contains(TripCount) &&
"TripCount must have been added to Old2NewVPValues");
NewPlan->TripCount = Old2NewVPValues[TripCount];
+
+ // Transfer all cloned blocks (the second half of all current blocks) from
+ // current to new VPlan.
+ unsigned NumBlocksAfterCloning = CreatedBlocks.size();
+ for (unsigned I :
+ seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
+ NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
+ CreatedBlocks.truncate(NumBlocksBeforeCloning);
+
return NewPlan;
}
+VPIRBasicBlock *VPlan::createEmptyVPIRBasicBlock(BasicBlock *IRBB) {
+ auto *VPIRBB = new VPIRBasicBlock(IRBB);
+ CreatedBlocks.push_back(VPIRBB);
+ return VPIRBB;
+}
+
+VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) {
+ auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
+ for (Instruction &I :
+ make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
+ VPIRBB->appendRecipe(new VPIRInstruction(I));
+ return VPIRBB;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1409,11 +1406,17 @@ void VPlanIngredient::print(raw_ostream &O) const {
#endif
-bool VPValue::isDefinedOutsideLoopRegions() const {
- return !hasDefiningRecipe() ||
- !getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
+/// Returns true if there is a vector loop region and \p VPV is defined in a
+/// loop region.
+static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
+ const VPRecipeBase *DefR = VPV->getDefiningRecipe();
+ return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
+ DefR->getParent()->getEnclosingLoopRegion());
}
+bool VPValue::isDefinedOutsideLoopRegions() const {
+ return !isDefinedInsideLoopRegions(this);
+}
void VPValue::replaceAllUsesWith(VPValue *New) {
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
}