diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 301 |
1 files changed, 211 insertions, 90 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 86e50a7f9143..2145bb8c9ca8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4546,6 +4546,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, return false; } +#ifndef NDEBUG VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1)); LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n"); @@ -4578,7 +4579,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { InstructionCost C = CM.expectedCost(VF); VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost); -#ifndef NDEBUG unsigned AssumedMinimumVscale = getVScaleForTuning(OrigLoop, TTI).value_or(1); unsigned Width = @@ -4591,7 +4591,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of " << AssumedMinimumVscale << ")"); LLVM_DEBUG(dbgs() << ".\n"); -#endif if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) { LLVM_DEBUG( @@ -4621,6 +4620,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n"); return ChosenFactor; } +#endif bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization( ElementCount VF) const { @@ -6985,15 +6985,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { return VectorizationFactor::Disabled(); } -std::optional<VectorizationFactor> -LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { +void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { assert(OrigLoop->isInnermost() && "Inner loop expected."); CM.collectValuesToIgnore(); CM.collectElementTypesForWidening(); FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC); if (!MaxFactors) // Cases that should not to be vectorized nor interleaved. - return std::nullopt; + return; // Invalidate interleave groups if all blocks of loop will be predicated. if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) && @@ -7028,14 +7027,8 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { if (CM.selectUserVectorizationFactor(UserVF)) { LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); buildVPlansWithVPRecipes(UserVF, UserVF); - if (!hasPlanWithVF(UserVF)) { - LLVM_DEBUG(dbgs() - << "LV: No VPlan could be built for " << UserVF << ".\n"); - return std::nullopt; - } - LLVM_DEBUG(printPlans(dbgs())); - return {{UserVF, 0, 0}}; + return; } else reportVectorizationInfo("UserVF ignored because of invalid costs.", "InvalidCost", ORE, OrigLoop); @@ -7066,24 +7059,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF); LLVM_DEBUG(printPlans(dbgs())); - if (VPlans.empty()) - return std::nullopt; - if (all_of(VPlans, - [](std::unique_ptr<VPlan> &P) { return P->hasScalarVFOnly(); })) - return VectorizationFactor::Disabled(); - - // Select the optimal vectorization factor according to the legacy cost-model. - // This is now only used to verify the decisions by the new VPlan-based - // cost-model and will be retired once the VPlan-based cost-model is - // stabilized. - VectorizationFactor VF = selectVectorizationFactor(); - assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero."); - if (!hasPlanWithVF(VF.Width)) { - LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width - << ".\n"); - return std::nullopt; - } - return VF; } InstructionCost VPCostContext::getLegacyCost(Instruction *UI, @@ -7255,11 +7230,13 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, return Cost; } -ElementCount LoopVectorizationPlanner::computeBestVF() { +VectorizationFactor LoopVectorizationPlanner::computeBestVF() { + if (VPlans.empty()) + return VectorizationFactor::Disabled(); // If there is a single VPlan with a single VF, return it directly. VPlan &FirstPlan = *VPlans[0]; if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1) - return *FirstPlan.vectorFactors().begin(); + return {*FirstPlan.vectorFactors().begin(), 0, 0}; ElementCount ScalarVF = ElementCount::getFixed(1); assert(hasPlanWithVF(ScalarVF) && @@ -7267,6 +7244,7 @@ ElementCount LoopVectorizationPlanner::computeBestVF() { // TODO: Compute scalar cost using VPlan-based cost model. InstructionCost ScalarCost = CM.expectedCost(ScalarVF); + LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n"); VectorizationFactor ScalarFactor(ScalarVF, ScalarCost, ScalarCost); VectorizationFactor BestFactor = ScalarFactor; @@ -7300,7 +7278,20 @@ ElementCount LoopVectorizationPlanner::computeBestVF() { ProfitableVFs.push_back(CurrentFactor); } } - return BestFactor.Width; + +#ifndef NDEBUG + // Select the optimal vectorization factor according to the legacy cost-model. + // This is now only used to verify the decisions by the new VPlan-based + // cost-model and will be retired once the VPlan-based cost-model is + // stabilized. + VectorizationFactor LegacyVF = selectVectorizationFactor(); + assert(BestFactor.Width == LegacyVF.Width && + " VPlan cost model and legacy cost model disagreed"); + assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) && + "when vectorizing, the scalar cost must be computed."); +#endif + + return BestFactor; } static void AddRuntimeUnrollDisableMetaData(Loop *L) { @@ -8158,8 +8149,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, // builder. At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. - // TODO: At the moment the first mask is always skipped, but it would be - // better to skip the most expensive mask. SmallVector<VPValue *, 2> OperandsWithMask; for (unsigned In = 0; In < NumIncoming; In++) { @@ -8172,8 +8161,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, "Distinct incoming values with one having a full mask"); break; } - if (In == 0) - continue; OperandsWithMask.push_back(EdgeMask); } return new VPBlendRecipe(Phi, OperandsWithMask); @@ -8527,9 +8514,11 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } -// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the -// original exit block. -static void addUsersInExitBlock( +// Collect (ExitPhi, ExitingValue) pairs phis in the original exit block that +// are modeled in VPlan. Some exiting values are not modeled explicitly yet and +// won't be included. Those are un-truncated VPWidenIntOrFpInductionRecipe, +// VPWidenPointerInductionRecipe and induction increments. +static MapVector<PHINode *, VPValue *> collectUsersInExitBlock( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector<PHINode *, InductionDescriptor> &Inductions) { auto MiddleVPBB = @@ -8538,9 +8527,8 @@ static void addUsersInExitBlock( // and there is nothing to fix from vector loop; phis should have incoming // from scalar loop only. if (MiddleVPBB->getNumSuccessors() != 2) - return; - - // Introduce VPUsers modeling the exit values. + return {}; + MapVector<PHINode *, VPValue *> ExitingValuesToFix; BasicBlock *ExitBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); BasicBlock *ExitingBB = OrigLoop->getExitingBlock(); @@ -8561,15 +8549,52 @@ static void addUsersInExitBlock( return P && Inductions.contains(P); }))) continue; - Plan.addLiveOut(&ExitPhi, V); + ExitingValuesToFix.insert({&ExitPhi, V}); } + return ExitingValuesToFix; } -/// Feed a resume value for every FOR from the vector loop to the scalar loop, -/// if middle block branches to scalar preheader, by introducing ExtractFromEnd -/// and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the -/// latter and corresponds to the scalar header. -static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) { +// Add exit values to \p Plan. Extracts and VPLiveOuts are added for each entry +// in \p ExitingValuesToFix. +static void +addUsersInExitBlock(VPlan &Plan, + MapVector<PHINode *, VPValue *> &ExitingValuesToFix) { + if (ExitingValuesToFix.empty()) + return; + + auto MiddleVPBB = + cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); + BasicBlock *ExitBB = + cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); + // TODO: set B to MiddleVPBB->getFirstNonPhi(), taking care of affected tests. + VPBuilder B(MiddleVPBB); + if (auto *Terminator = MiddleVPBB->getTerminator()) { + auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0)); + assert((!Condition || Condition->getParent() == MiddleVPBB) && + "Condition expected in MiddleVPBB"); + B.setInsertPoint(Condition ? Condition : Terminator); + } + + // Introduce VPUsers modeling the exit values. + for (const auto &[ExitPhi, V] : ExitingValuesToFix) { + VPValue *Ext = B.createNaryOp( + VPInstruction::ExtractFromEnd, + {V, Plan.getOrAddLiveIn(ConstantInt::get( + IntegerType::get(ExitBB->getContext(), 32), 1))}); + Plan.addLiveOut(ExitPhi, Ext); + } +} + +/// Handle live-outs for first order reductions, both in the scalar preheader +/// and the original exit block: +/// 1. Feed a resume value for every FOR from the vector loop to the scalar +/// loop, if middle block branches to scalar preheader, by introducing +/// ExtractFromEnd and ResumePhi recipes in each, respectively, and a +/// VPLiveOut which uses the latter and corresponds to the scalar header. +/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in +/// the original exit block using a VPLiveOut. +static void addLiveOutsForFirstOrderRecurrences( + VPlan &Plan, MapVector<PHINode *, VPValue *> &ExitingValuesToFix) { VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); // Start by finding out if middle block branches to scalar preheader, which is @@ -8578,21 +8603,31 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) { // TODO: Should be replaced by // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the // scalar region is modeled as well. - VPBasicBlock *ScalarPHVPBB = nullptr; auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor()); - for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) { - if (isa<VPIRBasicBlock>(Succ)) - continue; - assert(!ScalarPHVPBB && "Two candidates for ScalarPHVPBB?"); - ScalarPHVPBB = cast<VPBasicBlock>(Succ); + BasicBlock *ExitBB = nullptr; + VPBasicBlock *ScalarPHVPBB = nullptr; + if (MiddleVPBB->getNumSuccessors() == 2) { + // Order is strict: first is the exit block, second is the scalar preheader. + ExitBB = + cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); + ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]); + } else if (ExitingValuesToFix.empty()) { + ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()); + } else { + ExitBB = cast<VPIRBasicBlock>(MiddleVPBB->getSingleSuccessor()) + ->getIRBasicBlock(); } - if (!ScalarPHVPBB) + if (!ScalarPHVPBB) { + assert(ExitingValuesToFix.empty() && + "missed inserting extracts for exiting values"); return; + } VPBuilder ScalarPHBuilder(ScalarPHVPBB); VPBuilder MiddleBuilder(MiddleVPBB); // Reset insert point so new recipes are inserted before terminator and // condition, if there is either the former or both. + // TODO: set MiddleBuilder to MiddleVPBB->getFirstNonPhi(). if (auto *Terminator = MiddleVPBB->getTerminator()) { auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0)); assert((!Condition || Condition->getParent() == MiddleVPBB) && @@ -8601,12 +8636,81 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) { } VPValue *OneVPV = Plan.getOrAddLiveIn( ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + VPValue *TwoVPV = Plan.getOrAddLiveIn( + ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2)); for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) { auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi); if (!FOR) continue; + // This is the second phase of vectorizing first-order recurrences, creating + // extract for users outside the loop. An overview of the transformation is + // described below. Suppose we have the following loop with some use after + // the loop of the last a[i-1], + // + // for (int i = 0; i < n; ++i) { + // t = a[i - 1]; + // b[i] = a[i] - t; + // } + // use t; + // + // There is a first-order recurrence on "a". For this loop, the shorthand + // scalar IR looks like: + // + // scalar.ph: + // s.init = a[-1] + // br scalar.body + // + // scalar.body: + // i = phi [0, scalar.ph], [i+1, scalar.body] + // s1 = phi [s.init, scalar.ph], [s2, scalar.body] + // s2 = a[i] + // b[i] = s2 - s1 + // br cond, scalar.body, exit.block + // + // exit.block: + // use = lcssa.phi [s1, scalar.body] + // + // In this example, s1 is a recurrence because it's value depends on the + // previous iteration. In the first phase of vectorization, we created a + // VPFirstOrderRecurrencePHIRecipe v1 for s1. Now we create the extracts + // for users in the scalar preheader and exit block. + // + // vector.ph: + // v_init = vector(..., ..., ..., a[-1]) + // br vector.body + // + // vector.body + // i = phi [0, vector.ph], [i+4, vector.body] + // v1 = phi [v_init, vector.ph], [v2, vector.body] + // v2 = a[i, i+1, i+2, i+3] + // b[i] = v2 - v1 + // // Next, third phase will introduce v1' = splice(v1(3), v2(0, 1, 2)) + // b[i, i+1, i+2, i+3] = v2 - v1 + // br cond, vector.body, middle.block + // + // middle.block: + // vector.recur.extract.for.phi = v2(2) + // vector.recur.extract = v2(3) + // br cond, scalar.ph, exit.block + // + // scalar.ph: + // scalar.recur.init = phi [vector.recur.extract, middle.block], + // [s.init, otherwise] + // br scalar.body + // + // scalar.body: + // i = phi [0, scalar.ph], [i+1, scalar.body] + // s1 = phi [scalar.recur.init, scalar.ph], [s2, scalar.body] + // s2 = a[i] + // b[i] = s2 - s1 + // br cond, scalar.body, exit.block + // + // exit.block: + // lo = lcssa.phi [s1, scalar.body], + // [vector.recur.extract.for.phi, middle.block] + // // Extract the resume value and create a new VPLiveOut for it. auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), OneVPV}, @@ -8614,7 +8718,28 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) { auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {}, "scalar.recur.init"); - Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), ResumePhiRecipe); + auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr()); + Plan.addLiveOut(FORPhi, ResumePhiRecipe); + + // Now create VPLiveOuts for users in the exit block. + // Extract the penultimate value of the recurrence and add VPLiveOut + // users of the recurrence splice. + + // No edge from the middle block to the unique exit block has been inserted + // and there is nothing to fix from vector loop; phis should have incoming + // from scalar loop only. + if (ExitingValuesToFix.empty()) + continue; + for (User *U : FORPhi->users()) { + auto *UI = cast<Instruction>(U); + if (UI->getParent() != ExitBB) + continue; + VPValue *Ext = MiddleBuilder.createNaryOp( + VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {}, + "vector.recur.extract.for.phi"); + Plan.addLiveOut(cast<PHINode>(UI), Ext); + ExitingValuesToFix.erase(cast<PHINode>(UI)); + } } } @@ -8769,16 +8894,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // After here, VPBB should not be used. VPBB = nullptr; - addUsersInExitBlock(OrigLoop, RecipeBuilder, *Plan, - Legal->getInductionVars()); - assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) && !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); - addLiveOutsForFirstOrderRecurrences(*Plan); + MapVector<PHINode *, VPValue *> ExitingValuesToFix = collectUsersInExitBlock( + OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); + + addLiveOutsForFirstOrderRecurrences(*Plan, ExitingValuesToFix); + addUsersInExitBlock(*Plan, ExitingValuesToFix); // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to @@ -8931,6 +9057,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // iteration. The final value is selected by the final ComputeReductionResult. void LoopVectorizationPlanner::adjustRecipesForReductions( VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) { + using namespace VPlanPatternMatch; VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion(); VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock(); // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores @@ -8988,10 +9115,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( for (unsigned I = 0; I != Worklist.size(); ++I) { VPSingleDefRecipe *Cur = Worklist[I]; for (VPUser *U : Cur->users()) { - auto *UserRecipe = dyn_cast<VPSingleDefRecipe>(U); - if (!UserRecipe) { - assert(isa<VPLiveOut>(U) && - "U must either be a VPSingleDef or VPLiveOut"); + auto *UserRecipe = cast<VPSingleDefRecipe>(U); + if (!UserRecipe->getParent()->getEnclosingLoopRegion()) { + assert(match(U, m_Binary<VPInstruction::ExtractFromEnd>( + m_VPValue(), m_VPValue())) && + "U must be an ExtractFromEnd VPInstruction"); continue; } Worklist.insert(UserRecipe); @@ -9208,9 +9336,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( auto *FinalReductionResult = new VPInstruction( VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL); FinalReductionResult->insertBefore(*MiddleVPBB, IP); - OrigExitingVPV->replaceUsesWithIf( - FinalReductionResult, - [](VPUser &User, unsigned) { return isa<VPLiveOut>(&User); }); + OrigExitingVPV->replaceUsesWithIf(FinalReductionResult, [](VPUser &User, + unsigned) { + return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue(), + m_VPValue())); + }); } VPlanTransforms::clearReductionWrapFlags(*Plan); @@ -9828,21 +9958,19 @@ bool LoopVectorizePass::processLoop(Loop *L) { ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); - // Plan how to best vectorize, return the best VF and its cost. - std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC); + // Plan how to best vectorize. + LVP.plan(UserVF, UserIC); + VectorizationFactor VF = LVP.computeBestVF(); + unsigned IC = 1; if (ORE->allowExtraAnalysis(LV_NAME)) LVP.emitInvalidCostRemarks(ORE); - VectorizationFactor VF = VectorizationFactor::Disabled(); - unsigned IC = 1; - bool AddBranchWeights = hasBranchWeightMD(*L->getLoopLatch()->getTerminator()); GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI, F->getDataLayout(), AddBranchWeights); - if (MaybeVF) { - VF = *MaybeVF; + if (LVP.hasPlanWithVF(VF.Width)) { // Select the interleave count. IC = CM.selectInterleaveCount(VF.Width, VF.Cost); @@ -9882,7 +10010,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { VectorizeLoop = false; } - if (!MaybeVF && UserIC > 1) { + if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) { // Tell the user interleaving was avoided up-front, despite being explicitly // requested. LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and " @@ -9964,11 +10092,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM, BFI, PSI, Checks); - ElementCount BestVF = LVP.computeBestVF(); - assert(BestVF.isScalar() && - "VPlan cost model and legacy cost model disagreed"); - VPlan &BestPlan = LVP.getPlanFor(BestVF); - LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false); + VPlan &BestPlan = LVP.getPlanFor(VF.Width); + LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false); ORE->emit([&]() { return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -9979,20 +10104,16 @@ bool LoopVectorizePass::processLoop(Loop *L) { } else { // If we decided that it is *legal* to vectorize the loop, then do it. - ElementCount BestVF = LVP.computeBestVF(); - LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n"); - assert(VF.Width == BestVF && - "VPlan cost model and legacy cost model disagreed"); - VPlan &BestPlan = LVP.getPlanFor(BestVF); + VPlan &BestPlan = LVP.getPlanFor(VF.Width); // Consider vectorizing the epilogue too if it's profitable. VectorizationFactor EpilogueVF = - LVP.selectEpilogueVectorizationFactor(BestVF, IC); + LVP.selectEpilogueVectorizationFactor(VF.Width, IC); if (EpilogueVF.Width.isVector()) { // The first pass vectorizes the main loop and creates a scalar epilogue // to be vectorized by executing the plan (potentially with a different // factor) again shortly afterwards. - EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1); + EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1); EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); @@ -10087,10 +10208,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (!MainILV.areSafetyChecksAdded()) DisableRuntimeUnroll = true; } else { - InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF, + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there |
