Reapply "[VPlan] Compute cost of more replicating loads/stores in ::computeCost. (#160053)" (#162157)

This reverts commit f80c0baf058dbdc5 and 94eade61a02ae5. Recommit a small fix for targets using prefersVectorizedAddressing. Original message: Update VPReplicateRecipe::computeCost to compute costs of more replicating loads/stores. There are 2 cases that require extra checks to match the legacy cost model: 1. If the pointer is based on an induction, the legacy cost model passes its SCEV to getAddressComputationCost. In those cases, still fall back to the legacy cost. SCEV computations will be added as follow-up 2. If a load is used as part of an address of another load, the legacy cost model skips the scalarization overhead. Those cases are currently handled by a usedByLoadOrStore helper. Note that getScalarizationOverhead also needs updating, because when the legacy cost model computes the scalarization overhead, scalars have not been collected yet, so we can't each for replicating recipes to skip their cost, except other loads. This again can be further improved by modeling inserts/extracts explicitly and consistently, and compute costs for those operations directly where needed. PR: https://github.com/llvm/llvm-project/pull/160053
author: Florian Hahn <flo@fhahn.com> 2025-10-06 22:16:08 +0100
committer: GitHub <noreply@github.com> 2025-10-06 22:16:08 +0100
commit: 74af5784a570914271a258618bf6f7cb06c50241 (patch)
tree: a90a4425e1bcf342595ebaee050875fd57f1f6a0 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent: c830c843ab249940811f0bb15924ec94a7b01538 (diff)
1 files changed, 10 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d393a9c1506d..cee08ef94aeb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3903,7 +3903,8 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
       if (VF.isScalar())
         continue;
 
-      VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
+      VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
+                            *CM.PSE.getSE());
       precomputeCosts(*Plan, VF, CostCtx);
       auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
       for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4160,7 +4161,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
 
       // Add on other costs that are modelled in VPlan, but not in the legacy
       // cost model.
-      VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind);
+      VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
+                            *CM.PSE.getSE());
       VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
       assert(VectorRegion && "Expected to have a vector region!");
       for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -6852,7 +6854,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
 
 InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
                                                ElementCount VF) const {
-  VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind);
+  VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE());
   InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
 
   // Now compute and add the VPlan-based cost.
@@ -7085,7 +7087,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
   // simplifications not accounted for in the legacy cost model. If that's the
   // case, don't trigger the assertion, as the extra simplifications may cause a
   // different VF to be picked by the VPlan-based cost model.
-  VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind);
+  VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
+                        *CM.PSE.getSE());
   precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
   // Verify that the VPlan-based and legacy cost models agree, except for VPlans
   // with early exits and plans with additional VPlan simplifications. The
@@ -8418,7 +8421,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   // TODO: Enable following transform when the EVL-version of extended-reduction
   // and mulacc-reduction are implemented.
   if (!CM.foldTailWithEVL()) {
-    VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
+    VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
+                          *CM.PSE.getSE());
     VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
                              CostCtx, Range);
   }
@@ -9874,7 +9878,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     bool ForceVectorization =
         Hints.getForce() == LoopVectorizeHints::FK_Enabled;
     VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
-                          CM.CostKind);
+                          CM.CostKind, *CM.PSE.getSE());
     if (!ForceVectorization &&
         !isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
                                      LVP.getPlanFor(VF.Width), SEL,
author	Florian Hahn <flo@fhahn.com>	2025-10-06 22:16:08 +0100
committer	GitHub <noreply@github.com>	2025-10-06 22:16:08 +0100
commit	74af5784a570914271a258618bf6f7cb06c50241 (patch)
tree	a90a4425e1bcf342595ebaee050875fd57f1f6a0 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent	c830c843ab249940811f0bb15924ec94a7b01538 (diff)