summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp301
1 files changed, 211 insertions, 90 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 86e50a7f9143..2145bb8c9ca8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4546,6 +4546,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
return false;
}
+#ifndef NDEBUG
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1));
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
@@ -4578,7 +4579,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
InstructionCost C = CM.expectedCost(VF);
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost);
-#ifndef NDEBUG
unsigned AssumedMinimumVscale =
getVScaleForTuning(OrigLoop, TTI).value_or(1);
unsigned Width =
@@ -4591,7 +4591,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
<< AssumedMinimumVscale << ")");
LLVM_DEBUG(dbgs() << ".\n");
-#endif
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
LLVM_DEBUG(
@@ -4621,6 +4620,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n");
return ChosenFactor;
}
+#endif
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
@@ -6985,15 +6985,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
return VectorizationFactor::Disabled();
}
-std::optional<VectorizationFactor>
-LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
+void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
CM.collectValuesToIgnore();
CM.collectElementTypesForWidening();
FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
- return std::nullopt;
+ return;
// Invalidate interleave groups if all blocks of loop will be predicated.
if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
@@ -7028,14 +7027,8 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
if (CM.selectUserVectorizationFactor(UserVF)) {
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
buildVPlansWithVPRecipes(UserVF, UserVF);
- if (!hasPlanWithVF(UserVF)) {
- LLVM_DEBUG(dbgs()
- << "LV: No VPlan could be built for " << UserVF << ".\n");
- return std::nullopt;
- }
-
LLVM_DEBUG(printPlans(dbgs()));
- return {{UserVF, 0, 0}};
+ return;
} else
reportVectorizationInfo("UserVF ignored because of invalid costs.",
"InvalidCost", ORE, OrigLoop);
@@ -7066,24 +7059,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF);
LLVM_DEBUG(printPlans(dbgs()));
- if (VPlans.empty())
- return std::nullopt;
- if (all_of(VPlans,
- [](std::unique_ptr<VPlan> &P) { return P->hasScalarVFOnly(); }))
- return VectorizationFactor::Disabled();
-
- // Select the optimal vectorization factor according to the legacy cost-model.
- // This is now only used to verify the decisions by the new VPlan-based
- // cost-model and will be retired once the VPlan-based cost-model is
- // stabilized.
- VectorizationFactor VF = selectVectorizationFactor();
- assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
- if (!hasPlanWithVF(VF.Width)) {
- LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
- << ".\n");
- return std::nullopt;
- }
- return VF;
}
InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
@@ -7255,11 +7230,13 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
return Cost;
}
-ElementCount LoopVectorizationPlanner::computeBestVF() {
+VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
+ if (VPlans.empty())
+ return VectorizationFactor::Disabled();
// If there is a single VPlan with a single VF, return it directly.
VPlan &FirstPlan = *VPlans[0];
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
- return *FirstPlan.vectorFactors().begin();
+ return {*FirstPlan.vectorFactors().begin(), 0, 0};
ElementCount ScalarVF = ElementCount::getFixed(1);
assert(hasPlanWithVF(ScalarVF) &&
@@ -7267,6 +7244,7 @@ ElementCount LoopVectorizationPlanner::computeBestVF() {
// TODO: Compute scalar cost using VPlan-based cost model.
InstructionCost ScalarCost = CM.expectedCost(ScalarVF);
+ LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n");
VectorizationFactor ScalarFactor(ScalarVF, ScalarCost, ScalarCost);
VectorizationFactor BestFactor = ScalarFactor;
@@ -7300,7 +7278,20 @@ ElementCount LoopVectorizationPlanner::computeBestVF() {
ProfitableVFs.push_back(CurrentFactor);
}
}
- return BestFactor.Width;
+
+#ifndef NDEBUG
+ // Select the optimal vectorization factor according to the legacy cost-model.
+ // This is now only used to verify the decisions by the new VPlan-based
+ // cost-model and will be retired once the VPlan-based cost-model is
+ // stabilized.
+ VectorizationFactor LegacyVF = selectVectorizationFactor();
+ assert(BestFactor.Width == LegacyVF.Width &&
+ " VPlan cost model and legacy cost model disagreed");
+ assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
+ "when vectorizing, the scalar cost must be computed.");
+#endif
+
+ return BestFactor;
}
static void AddRuntimeUnrollDisableMetaData(Loop *L) {
@@ -8158,8 +8149,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
// builder. At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- // TODO: At the moment the first mask is always skipped, but it would be
- // better to skip the most expensive mask.
SmallVector<VPValue *, 2> OperandsWithMask;
for (unsigned In = 0; In < NumIncoming; In++) {
@@ -8172,8 +8161,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
"Distinct incoming values with one having a full mask");
break;
}
- if (In == 0)
- continue;
OperandsWithMask.push_back(EdgeMask);
}
return new VPBlendRecipe(Phi, OperandsWithMask);
@@ -8527,9 +8514,11 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
}
-// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the
-// original exit block.
-static void addUsersInExitBlock(
+// Collect (ExitPhi, ExitingValue) pairs phis in the original exit block that
+// are modeled in VPlan. Some exiting values are not modeled explicitly yet and
+// won't be included. Those are un-truncated VPWidenIntOrFpInductionRecipe,
+// VPWidenPointerInductionRecipe and induction increments.
+static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
auto MiddleVPBB =
@@ -8538,9 +8527,8 @@ static void addUsersInExitBlock(
// and there is nothing to fix from vector loop; phis should have incoming
// from scalar loop only.
if (MiddleVPBB->getNumSuccessors() != 2)
- return;
-
- // Introduce VPUsers modeling the exit values.
+ return {};
+ MapVector<PHINode *, VPValue *> ExitingValuesToFix;
BasicBlock *ExitBB =
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
BasicBlock *ExitingBB = OrigLoop->getExitingBlock();
@@ -8561,15 +8549,52 @@ static void addUsersInExitBlock(
return P && Inductions.contains(P);
})))
continue;
- Plan.addLiveOut(&ExitPhi, V);
+ ExitingValuesToFix.insert({&ExitPhi, V});
}
+ return ExitingValuesToFix;
}
-/// Feed a resume value for every FOR from the vector loop to the scalar loop,
-/// if middle block branches to scalar preheader, by introducing ExtractFromEnd
-/// and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the
-/// latter and corresponds to the scalar header.
-static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
+// Add exit values to \p Plan. Extracts and VPLiveOuts are added for each entry
+// in \p ExitingValuesToFix.
+static void
+addUsersInExitBlock(VPlan &Plan,
+ MapVector<PHINode *, VPValue *> &ExitingValuesToFix) {
+ if (ExitingValuesToFix.empty())
+ return;
+
+ auto MiddleVPBB =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
+ BasicBlock *ExitBB =
+ cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
+ // TODO: set B to MiddleVPBB->getFirstNonPhi(), taking care of affected tests.
+ VPBuilder B(MiddleVPBB);
+ if (auto *Terminator = MiddleVPBB->getTerminator()) {
+ auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0));
+ assert((!Condition || Condition->getParent() == MiddleVPBB) &&
+ "Condition expected in MiddleVPBB");
+ B.setInsertPoint(Condition ? Condition : Terminator);
+ }
+
+ // Introduce VPUsers modeling the exit values.
+ for (const auto &[ExitPhi, V] : ExitingValuesToFix) {
+ VPValue *Ext = B.createNaryOp(
+ VPInstruction::ExtractFromEnd,
+ {V, Plan.getOrAddLiveIn(ConstantInt::get(
+ IntegerType::get(ExitBB->getContext(), 32), 1))});
+ Plan.addLiveOut(ExitPhi, Ext);
+ }
+}
+
+/// Handle live-outs for first order reductions, both in the scalar preheader
+/// and the original exit block:
+/// 1. Feed a resume value for every FOR from the vector loop to the scalar
+/// loop, if middle block branches to scalar preheader, by introducing
+/// ExtractFromEnd and ResumePhi recipes in each, respectively, and a
+/// VPLiveOut which uses the latter and corresponds to the scalar header.
+/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in
+/// the original exit block using a VPLiveOut.
+static void addLiveOutsForFirstOrderRecurrences(
+ VPlan &Plan, MapVector<PHINode *, VPValue *> &ExitingValuesToFix) {
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
// Start by finding out if middle block branches to scalar preheader, which is
@@ -8578,21 +8603,31 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
// TODO: Should be replaced by
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
// scalar region is modeled as well.
- VPBasicBlock *ScalarPHVPBB = nullptr;
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
- for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
- if (isa<VPIRBasicBlock>(Succ))
- continue;
- assert(!ScalarPHVPBB && "Two candidates for ScalarPHVPBB?");
- ScalarPHVPBB = cast<VPBasicBlock>(Succ);
+ BasicBlock *ExitBB = nullptr;
+ VPBasicBlock *ScalarPHVPBB = nullptr;
+ if (MiddleVPBB->getNumSuccessors() == 2) {
+ // Order is strict: first is the exit block, second is the scalar preheader.
+ ExitBB =
+ cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
+ } else if (ExitingValuesToFix.empty()) {
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
+ } else {
+ ExitBB = cast<VPIRBasicBlock>(MiddleVPBB->getSingleSuccessor())
+ ->getIRBasicBlock();
}
- if (!ScalarPHVPBB)
+ if (!ScalarPHVPBB) {
+ assert(ExitingValuesToFix.empty() &&
+ "missed inserting extracts for exiting values");
return;
+ }
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
VPBuilder MiddleBuilder(MiddleVPBB);
// Reset insert point so new recipes are inserted before terminator and
// condition, if there is either the former or both.
+ // TODO: set MiddleBuilder to MiddleVPBB->getFirstNonPhi().
if (auto *Terminator = MiddleVPBB->getTerminator()) {
auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0));
assert((!Condition || Condition->getParent() == MiddleVPBB) &&
@@ -8601,12 +8636,81 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
}
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+ VPValue *TwoVPV = Plan.getOrAddLiveIn(
+ ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) {
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
if (!FOR)
continue;
+ // This is the second phase of vectorizing first-order recurrences, creating
+ // extract for users outside the loop. An overview of the transformation is
+ // described below. Suppose we have the following loop with some use after
+ // the loop of the last a[i-1],
+ //
+ // for (int i = 0; i < n; ++i) {
+ // t = a[i - 1];
+ // b[i] = a[i] - t;
+ // }
+ // use t;
+ //
+ // There is a first-order recurrence on "a". For this loop, the shorthand
+ // scalar IR looks like:
+ //
+ // scalar.ph:
+ // s.init = a[-1]
+ // br scalar.body
+ //
+ // scalar.body:
+ // i = phi [0, scalar.ph], [i+1, scalar.body]
+ // s1 = phi [s.init, scalar.ph], [s2, scalar.body]
+ // s2 = a[i]
+ // b[i] = s2 - s1
+ // br cond, scalar.body, exit.block
+ //
+ // exit.block:
+ // use = lcssa.phi [s1, scalar.body]
+ //
+ // In this example, s1 is a recurrence because it's value depends on the
+ // previous iteration. In the first phase of vectorization, we created a
+ // VPFirstOrderRecurrencePHIRecipe v1 for s1. Now we create the extracts
+ // for users in the scalar preheader and exit block.
+ //
+ // vector.ph:
+ // v_init = vector(..., ..., ..., a[-1])
+ // br vector.body
+ //
+ // vector.body
+ // i = phi [0, vector.ph], [i+4, vector.body]
+ // v1 = phi [v_init, vector.ph], [v2, vector.body]
+ // v2 = a[i, i+1, i+2, i+3]
+ // b[i] = v2 - v1
+ // // Next, third phase will introduce v1' = splice(v1(3), v2(0, 1, 2))
+ // b[i, i+1, i+2, i+3] = v2 - v1
+ // br cond, vector.body, middle.block
+ //
+ // middle.block:
+ // vector.recur.extract.for.phi = v2(2)
+ // vector.recur.extract = v2(3)
+ // br cond, scalar.ph, exit.block
+ //
+ // scalar.ph:
+ // scalar.recur.init = phi [vector.recur.extract, middle.block],
+ // [s.init, otherwise]
+ // br scalar.body
+ //
+ // scalar.body:
+ // i = phi [0, scalar.ph], [i+1, scalar.body]
+ // s1 = phi [scalar.recur.init, scalar.ph], [s2, scalar.body]
+ // s2 = a[i]
+ // b[i] = s2 - s1
+ // br cond, scalar.body, exit.block
+ //
+ // exit.block:
+ // lo = lcssa.phi [s1, scalar.body],
+ // [vector.recur.extract.for.phi, middle.block]
+ //
// Extract the resume value and create a new VPLiveOut for it.
auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
{FOR->getBackedgeValue(), OneVPV},
@@ -8614,7 +8718,28 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
- Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), ResumePhiRecipe);
+ auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
+ Plan.addLiveOut(FORPhi, ResumePhiRecipe);
+
+ // Now create VPLiveOuts for users in the exit block.
+ // Extract the penultimate value of the recurrence and add VPLiveOut
+ // users of the recurrence splice.
+
+ // No edge from the middle block to the unique exit block has been inserted
+ // and there is nothing to fix from vector loop; phis should have incoming
+ // from scalar loop only.
+ if (ExitingValuesToFix.empty())
+ continue;
+ for (User *U : FORPhi->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (UI->getParent() != ExitBB)
+ continue;
+ VPValue *Ext = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {},
+ "vector.recur.extract.for.phi");
+ Plan.addLiveOut(cast<PHINode>(UI), Ext);
+ ExitingValuesToFix.erase(cast<PHINode>(UI));
+ }
}
}
@@ -8769,16 +8894,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// After here, VPBB should not be used.
VPBB = nullptr;
- addUsersInExitBlock(OrigLoop, RecipeBuilder, *Plan,
- Legal->getInductionVars());
-
assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
!Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
"VPBasicBlock");
RecipeBuilder.fixHeaderPhis();
- addLiveOutsForFirstOrderRecurrences(*Plan);
+ MapVector<PHINode *, VPValue *> ExitingValuesToFix = collectUsersInExitBlock(
+ OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
+
+ addLiveOutsForFirstOrderRecurrences(*Plan, ExitingValuesToFix);
+ addUsersInExitBlock(*Plan, ExitingValuesToFix);
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8931,6 +9057,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// iteration. The final value is selected by the final ComputeReductionResult.
void LoopVectorizationPlanner::adjustRecipesForReductions(
VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) {
+ using namespace VPlanPatternMatch;
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
// Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
@@ -8988,10 +9115,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
for (unsigned I = 0; I != Worklist.size(); ++I) {
VPSingleDefRecipe *Cur = Worklist[I];
for (VPUser *U : Cur->users()) {
- auto *UserRecipe = dyn_cast<VPSingleDefRecipe>(U);
- if (!UserRecipe) {
- assert(isa<VPLiveOut>(U) &&
- "U must either be a VPSingleDef or VPLiveOut");
+ auto *UserRecipe = cast<VPSingleDefRecipe>(U);
+ if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
+ assert(match(U, m_Binary<VPInstruction::ExtractFromEnd>(
+ m_VPValue(), m_VPValue())) &&
+ "U must be an ExtractFromEnd VPInstruction");
continue;
}
Worklist.insert(UserRecipe);
@@ -9208,9 +9336,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
auto *FinalReductionResult = new VPInstruction(
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
- OrigExitingVPV->replaceUsesWithIf(
- FinalReductionResult,
- [](VPUser &User, unsigned) { return isa<VPLiveOut>(&User); });
+ OrigExitingVPV->replaceUsesWithIf(FinalReductionResult, [](VPUser &User,
+ unsigned) {
+ return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue(),
+ m_VPValue()));
+ });
}
VPlanTransforms::clearReductionWrapFlags(*Plan);
@@ -9828,21 +9958,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
- // Plan how to best vectorize, return the best VF and its cost.
- std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
+ // Plan how to best vectorize.
+ LVP.plan(UserVF, UserIC);
+ VectorizationFactor VF = LVP.computeBestVF();
+ unsigned IC = 1;
if (ORE->allowExtraAnalysis(LV_NAME))
LVP.emitInvalidCostRemarks(ORE);
- VectorizationFactor VF = VectorizationFactor::Disabled();
- unsigned IC = 1;
-
bool AddBranchWeights =
hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
F->getDataLayout(), AddBranchWeights);
- if (MaybeVF) {
- VF = *MaybeVF;
+ if (LVP.hasPlanWithVF(VF.Width)) {
// Select the interleave count.
IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
@@ -9882,7 +10010,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (!MaybeVF && UserIC > 1) {
+ if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -9964,11 +10092,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
&CM, BFI, PSI, Checks);
- ElementCount BestVF = LVP.computeBestVF();
- assert(BestVF.isScalar() &&
- "VPlan cost model and legacy cost model disagreed");
- VPlan &BestPlan = LVP.getPlanFor(BestVF);
- LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false);
+ VPlan &BestPlan = LVP.getPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -9979,20 +10104,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
} else {
// If we decided that it is *legal* to vectorize the loop, then do it.
- ElementCount BestVF = LVP.computeBestVF();
- LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n");
- assert(VF.Width == BestVF &&
- "VPlan cost model and legacy cost model disagreed");
- VPlan &BestPlan = LVP.getPlanFor(BestVF);
+ VPlan &BestPlan = LVP.getPlanFor(VF.Width);
// Consider vectorizing the epilogue too if it's profitable.
VectorizationFactor EpilogueVF =
- LVP.selectEpilogueVectorizationFactor(BestVF, IC);
+ LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
if (EpilogueVF.Width.isVector()) {
// The first pass vectorizes the main loop and creates a scalar epilogue
// to be vectorized by executing the plan (potentially with a different
// factor) again shortly afterwards.
- EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1);
+ EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &LVL, &CM, BFI, PSI, Checks);
@@ -10087,10 +10208,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!MainILV.areSafetyChecksAdded())
DisableRuntimeUnroll = true;
} else {
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF,
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);
- LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false);
+ LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there