diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 136 |
1 files changed, 83 insertions, 53 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e060e7081042..48cf763fa398 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -372,7 +372,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, auto *Exiting = Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); VPRegionBlock *Region = - Plan.createVPRegionBlock(Entry, Exiting, RegionName, true); + Plan.createReplicateRegion(Entry, Exiting, RegionName); // Note: first set Entry as region entry and then connect successors starting // from it in order, to propagate the "parent" of each VPBasicBlock. @@ -1478,11 +1478,8 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, if (!Plan.getVectorLoopRegion()) return false; - if (!Plan.getTripCount()->isLiveIn()) - return false; - auto *TC = dyn_cast_if_present<ConstantInt>( - Plan.getTripCount()->getUnderlyingValue()); - if (!TC || !BestVF.isFixed()) + const APInt *TC; + if (!BestVF.isFixed() || !match(Plan.getTripCount(), m_APInt(TC))) return false; // Calculate the minimum power-of-2 bit width that can fit the known TC, VF @@ -1495,7 +1492,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, return std::max<unsigned>(PowerOf2Ceil(MaxVal.getActiveBits()), 8); }; unsigned NewBitWidth = - ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF); + ComputeBitWidth(*TC, BestVF.getKnownMinValue() * BestUF); LLVMContext &Ctx = Plan.getContext(); auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth); @@ -2092,8 +2089,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { // Recipes in replicate regions implicitly depend on predicate. If either // recipe is in a replicate region, only consider them equal if both have // the same parent. - const VPRegionBlock *RegionL = L->getParent()->getParent(); - const VPRegionBlock *RegionR = R->getParent()->getParent(); + const VPRegionBlock *RegionL = L->getRegion(); + const VPRegionBlock *RegionR = R->getRegion(); if (((RegionL && RegionL->isReplicator()) || (RegionR && RegionR->isReplicator())) && L->getParent() != R->getParent()) @@ -3867,8 +3864,7 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) { // required lanes implicitly. // TODO: Remove once replicate regions are unrolled completely. auto IsCandidateUnpackUser = [Def](VPUser *U) { - VPRegionBlock *ParentRegion = - cast<VPRecipeBase>(U)->getParent()->getParent(); + VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion(); return U->usesScalars(Def) && (!ParentRegion || !ParentRegion->isReplicator()); }; @@ -3960,6 +3956,9 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, // used. // TODO: Assert that they aren't used. + VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF())); + Plan.getSymbolicUF().replaceAllUsesWith(UF); + // If there are no users of the runtime VF, compute VFxUF by constant folding // the multiplication of VF and UF. if (VF.getNumUsers() == 0) { @@ -3979,7 +3978,6 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, } VF.replaceAllUsesWith(RuntimeVF); - VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF())); VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF}); VFxUF.replaceAllUsesWith(MulByUF); } @@ -4047,14 +4045,14 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx, return false; } -/// Returns true if \p IR is a full interleave group with factor and number of -/// members both equal to \p VF. The interleave group must also access the full -/// vector width \p VectorRegWidth. -static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR, - unsigned VF, VPTypeAnalysis &TypeInfo, - unsigned VectorRegWidth) { - if (!InterleaveR) - return false; +/// Returns VF from \p VFs if \p IR is a full interleave group with factor and +/// number of members both equal to VF. The interleave group must also access +/// the full vector width. +static std::optional<ElementCount> isConsecutiveInterleaveGroup( + VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs, + VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI) { + if (!InterleaveR || InterleaveR->getMask()) + return std::nullopt; Type *GroupElementTy = nullptr; if (InterleaveR->getStoredValues().empty()) { @@ -4063,7 +4061,7 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR, [&TypeInfo, GroupElementTy](VPValue *Op) { return TypeInfo.inferScalarType(Op) == GroupElementTy; })) - return false; + return std::nullopt; } else { GroupElementTy = TypeInfo.inferScalarType(InterleaveR->getStoredValues()[0]); @@ -4071,13 +4069,27 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR, [&TypeInfo, GroupElementTy](VPValue *Op) { return TypeInfo.inferScalarType(Op) == GroupElementTy; })) - return false; + return std::nullopt; } - unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * VF; - auto IG = InterleaveR->getInterleaveGroup(); - return IG->getFactor() == VF && IG->getNumMembers() == VF && - GroupSize == VectorRegWidth; + auto GetVectorWidthForVF = [&TTI](ElementCount VF) { + TypeSize Size = TTI.getRegisterBitWidth( + VF.isFixed() ? TargetTransformInfo::RGK_FixedWidthVector + : TargetTransformInfo::RGK_ScalableVector); + assert(Size.isScalable() == VF.isScalable() && + "if Size is scalable, VF must to and vice versa"); + return Size.getKnownMinValue(); + }; + + for (ElementCount VF : VFs) { + unsigned MinVal = VF.getKnownMinValue(); + unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * MinVal; + auto IG = InterleaveR->getInterleaveGroup(); + if (IG->getFactor() == MinVal && IG->getNumMembers() == MinVal && + GroupSize == GetVectorWidthForVF(VF)) + return {VF}; + } + return std::nullopt; } /// Returns true if \p VPValue is a narrow VPValue. @@ -4088,16 +4100,18 @@ static bool isAlreadyNarrow(VPValue *VPV) { return RepR && RepR->isSingleScalar(); } -void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, - unsigned VectorRegWidth) { +std::unique_ptr<VPlan> +VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, + const TargetTransformInfo &TTI) { + using namespace llvm::VPlanPatternMatch; VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion(); + if (!VectorLoop) - return; + return nullptr; VPTypeAnalysis TypeInfo(Plan); - - unsigned VFMinVal = VF.getKnownMinValue(); SmallVector<VPInterleaveRecipe *> StoreGroups; + std::optional<ElementCount> VFToOptimize; for (auto &R : *VectorLoop->getEntryBasicBlock()) { if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount())) continue; @@ -4111,30 +4125,33 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, // * recipes writing to memory except interleave groups // Only support plans with a canonical induction phi. if (R.isPhi()) - return; + return nullptr; auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R); if (R.mayWriteToMemory() && !InterleaveR) - return; - - // Do not narrow interleave groups if there are VectorPointer recipes and - // the plan was unrolled. The recipe implicitly uses VF from - // VPTransformState. - // TODO: Remove restriction once the VF for the VectorPointer offset is - // modeled explicitly as operand. - if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1) - return; + return nullptr; // All other ops are allowed, but we reject uses that cannot be converted // when checking all allowed consumers (store interleave groups) below. if (!InterleaveR) continue; - // Bail out on non-consecutive interleave groups. - if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo, - VectorRegWidth)) - return; - + // Try to find a single VF, where all interleave groups are consecutive and + // saturate the full vector width. If we already have a candidate VF, check + // if it is applicable for the current InterleaveR, otherwise look for a + // suitable VF across the Plans VFs. + // + if (VFToOptimize) { + if (!isConsecutiveInterleaveGroup(InterleaveR, {*VFToOptimize}, TypeInfo, + TTI)) + return nullptr; + } else { + if (auto VF = isConsecutiveInterleaveGroup( + InterleaveR, to_vector(Plan.vectorFactors()), TypeInfo, TTI)) + VFToOptimize = *VF; + else + return nullptr; + } // Skip read interleave groups. if (InterleaveR->getStoredValues().empty()) continue; @@ -4168,24 +4185,34 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>( InterleaveR->getStoredValues()[0]->getDefiningRecipe()); if (!WideMember0) - return; + return nullptr; for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) { auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe()); if (!R || R->getOpcode() != WideMember0->getOpcode() || R->getNumOperands() > 2) - return; + return nullptr; if (any_of(enumerate(R->operands()), [WideMember0, Idx = I](const auto &P) { const auto &[OpIdx, OpV] = P; return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx); })) - return; + return nullptr; } StoreGroups.push_back(InterleaveR); } if (StoreGroups.empty()) - return; + return nullptr; + + // All interleave groups in Plan can be narrowed for VFToOptimize. Split the + // original Plan into 2: a) a new clone which contains all VFs of Plan, except + // VFToOptimize, and b) the original Plan with VFToOptimize as single VF. + std::unique_ptr<VPlan> NewPlan; + if (size(Plan.vectorFactors()) != 1) { + NewPlan = std::unique_ptr<VPlan>(Plan.duplicate()); + Plan.setVF(*VFToOptimize); + NewPlan->removeVF(*VFToOptimize); + } // Convert InterleaveGroup \p R to a single VPWidenLoadRecipe. SmallPtrSet<VPValue *, 4> NarrowedOps; @@ -4256,9 +4283,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue()); VPBuilder PHBuilder(Plan.getVectorPreheader()); - VPValue *UF = Plan.getOrAddLiveIn( - ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF())); - if (VF.isScalable()) { + VPValue *UF = &Plan.getSymbolicUF(); + if (VFToOptimize->isScalable()) { VPValue *VScale = PHBuilder.createElementCount( CanIV->getScalarType(), ElementCount::getScalable(1)); VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF}); @@ -4270,6 +4296,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1))); } removeDeadRecipes(Plan); + assert(none_of(*VectorLoop->getEntryBasicBlock(), + IsaPred<VPVectorPointerRecipe>) && + "All VPVectorPointerRecipes should have been removed"); + return NewPlan; } /// Add branch weight metadata, if the \p Plan's middle block is terminated by a |
