summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorHassnaa Hamdi <hassnaa.hamdi@arm.com>2025-10-26 14:45:44 +0000
committerGitHub <noreply@github.com>2025-10-26 14:45:44 +0000
commitbe29f0dd86d1b2ae98fbc2de2a2b1dcd974871f9 (patch)
tree064c030c1c1ca4746d62127ff3cd2ae0a5456f70 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent63b83ea213878acde020fc8923ee65f42727009e (diff)
[LV]: Improve accuracy of calculating remaining iterations of MainLoopVF (#156723)
Transform TC and VF to same numerical space when they are different.
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp17
1 files changed, 15 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 335651628e22..facb0fabdf57 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4378,8 +4378,21 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
const SCEV *TC =
vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE);
assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
- RemainingIterations =
- SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+ const SCEV *KnownMinTC;
+ bool ScalableTC = match(TC, m_scev_c_Mul(m_SCEV(KnownMinTC), m_SCEVVScale()));
+ // Use versions of TC and VF in which both are either scalable or fixed.
+ if (ScalableTC == MainLoopVF.isScalable())
+ RemainingIterations =
+ SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+ else if (ScalableTC) {
+ const SCEV *EstimatedTC = SE.getMulExpr(
+ KnownMinTC,
+ SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1)));
+ RemainingIterations = SE.getURemExpr(
+ EstimatedTC, SE.getElementCount(TCType, MainLoopVF * IC));
+ } else
+ RemainingIterations =
+ SE.getURemExpr(TC, SE.getElementCount(TCType, EstimatedRuntimeVF * IC));
// No iterations left to process in the epilogue.
if (RemainingIterations->isZero())