diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 146 |
1 files changed, 138 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index f972efa07eb7..16b1b539345d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include <cassert> #include <string> @@ -55,6 +56,15 @@ namespace llvm { extern cl::opt<bool> EnableVPlanNativePath; } +/// @{ +/// Metadata attribute names +const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all"; +const char LLVMLoopVectorizeFollowupVectorized[] = + "llvm.loop.vectorize.followup_vectorized"; +const char LLVMLoopVectorizeFollowupEpilogue[] = + "llvm.loop.vectorize.followup_epilogue"; +/// @} + extern cl::opt<unsigned> ForceTargetInstructionCost; static cl::opt<bool> PrintVPlansInDotFormat( @@ -143,7 +153,7 @@ template <typename T> static T *getPlanEntry(T *Start) { for (unsigned i = 0; i < WorkList.size(); i++) { T *Current = WorkList[i]; - if (Current->getNumPredecessors() == 0) + if (!Current->hasPredecessors()) return Current; auto &Predecessors = Current->getPredecessors(); WorkList.insert_range(Predecessors); @@ -216,7 +226,7 @@ bool VPBlockUtils::isHeader(const VPBlockBase *VPB, // If VPBB is in a region R, VPBB is a loop header if R is a loop region with // VPBB as its entry, i.e., free of predecessors. if (auto *R = VPBB->getParent()) - return !R->isReplicator() && VPBB->getNumPredecessors() == 0; + return !R->isReplicator() && !VPBB->hasPredecessors(); // A header dominates its second predecessor (the latch), with the other // predecessor being the preheader @@ -493,6 +503,9 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) { void VPIRBasicBlock::execute(VPTransformState *State) { assert(getHierarchicalSuccessors().size() <= 2 && "VPIRBasicBlock can have at most two successors at the moment!"); + // Move completely disconnected blocks to their final position. + if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB)) + IRBB->moveAfter(State->CFG.PrevBB); State->Builder.SetInsertPoint(IRBB->getTerminator()); State->CFG.PrevBB = IRBB; State->CFG.VPBB2IRBB[this] = IRBB; @@ -809,7 +822,7 @@ InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const { const VPBlockBase *Pred = nullptr; - if (getNumPredecessors() > 0) { + if (hasPredecessors()) { Pred = getPredecessors()[Idx]; } else { auto *Region = getParent(); @@ -1183,14 +1196,14 @@ VPlan *VPlan::duplicate() { BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock(); VPIRBasicBlock *NewScalarHeader = nullptr; - if (getScalarHeader()->getNumPredecessors() == 0) { - NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB); - } else { + if (getScalarHeader()->hasPredecessors()) { NewScalarHeader = cast<VPIRBasicBlock>(*find_if( vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) { auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB); return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB; })); + } else { + NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB); } // Create VPlan, clone live-ins and remap operands in the cloned blocks. auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader); @@ -1473,7 +1486,7 @@ void VPSlotTracker::assignName(const VPValue *V) { std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str(); // First assign the base name for V. - const auto &[A, _] = VPValue2Name.insert({V, BaseName}); + const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName); // Integer or FP constants with different types will result in he same string // due to stripping types. if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV)) @@ -1481,7 +1494,7 @@ void VPSlotTracker::assignName(const VPValue *V) { // If it is already used by C > 0 other VPValues, increase the version counter // C and use it for V. - const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0}); + const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0); if (!UseInserted) { C->second++; A->second = (BaseName + Twine(".") + Twine(C->second)).str(); @@ -1612,6 +1625,123 @@ VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const { llvm_unreachable("No plan found!"); } +static void addRuntimeUnrollDisableMetaData(Loop *L) { + SmallVector<Metadata *, 4> MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + bool IsUnrollMetadata = false; + MDNode *LoopID = L->getLoopID(); + if (LoopID) { + // First find existing loop unrolling disable metadata. + for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) { + auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I)); + if (MD) { + const auto *S = dyn_cast<MDString>(MD->getOperand(0)); + if (!S) + continue; + if (S->getString().starts_with("llvm.loop.unroll.runtime.disable")) + continue; + IsUnrollMetadata = + S->getString().starts_with("llvm.loop.unroll.disable"); + } + MDs.push_back(LoopID->getOperand(I)); + } + } + + if (!IsUnrollMetadata) { + // Add runtime unroll disable metadata. + LLVMContext &Context = L->getHeader()->getContext(); + SmallVector<Metadata *, 1> DisableOperands; + DisableOperands.push_back( + MDString::get(Context, "llvm.loop.unroll.runtime.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + L->setLoopID(NewLoopID); + } +} + +void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo( + Loop *VectorLoop, VPBasicBlock *HeaderVPBB, bool VectorizingEpilogue, + unsigned EstimatedVFxUF, bool DisableRuntimeUnroll) { + MDNode *LID = OrigLoop->getLoopID(); + // Update the metadata of the scalar loop. Skip the update when vectorizing + // the epilogue loop, to ensure it is only updated once. + if (!VectorizingEpilogue) { + std::optional<MDNode *> RemainderLoopID = makeFollowupLoopID( + LID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupEpilogue}); + if (RemainderLoopID) { + OrigLoop->setLoopID(*RemainderLoopID); + } else { + if (DisableRuntimeUnroll) + addRuntimeUnrollDisableMetaData(OrigLoop); + + LoopVectorizeHints Hints(OrigLoop, true, *ORE); + Hints.setAlreadyVectorized(); + } + } + + if (!VectorLoop) + return; + + if (std::optional<MDNode *> VectorizedLoopID = + makeFollowupLoopID(LID, {LLVMLoopVectorizeFollowupAll, + LLVMLoopVectorizeFollowupVectorized})) { + VectorLoop->setLoopID(*VectorizedLoopID); + } else { + // Keep all loop hints from the original loop on the vector loop (we'll + // replace the vectorizer-specific hints below). + if (LID) + VectorLoop->setLoopID(LID); + + if (!VectorizingEpilogue) { + LoopVectorizeHints Hints(VectorLoop, true, *ORE); + Hints.setAlreadyVectorized(); + } + + // Check if it's EVL-vectorized and mark the corresponding metadata. + bool IsEVLVectorized = + llvm::any_of(*HeaderVPBB, [](const VPRecipeBase &Recipe) { + // Looking for the ExplictVectorLength VPInstruction. + if (const auto *VI = dyn_cast<VPInstruction>(&Recipe)) + return VI->getOpcode() == VPInstruction::ExplicitVectorLength; + return false; + }); + if (IsEVLVectorized) { + LLVMContext &Context = VectorLoop->getHeader()->getContext(); + MDNode *LoopID = VectorLoop->getLoopID(); + auto *IsEVLVectorizedMD = MDNode::get( + Context, + {MDString::get(Context, "llvm.loop.isvectorized.tailfoldingstyle"), + MDString::get(Context, "evl")}); + MDNode *NewLoopID = makePostTransformationMetadata(Context, LoopID, {}, + {IsEVLVectorizedMD}); + VectorLoop->setLoopID(NewLoopID); + } + } + TargetTransformInfo::UnrollingPreferences UP; + TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE); + if (!UP.UnrollVectorizedLoop || VectorizingEpilogue) + addRuntimeUnrollDisableMetaData(VectorLoop); + + // Set/update profile weights for the vector and remainder loops as original + // loop iterations are now distributed among them. Note that original loop + // becomes the scalar remainder loop after vectorization. + // + // For cases like foldTailByMasking() and requiresScalarEpiloque() we may + // end up getting slightly roughened result but that should be OK since + // profile is not inherently precise anyway. Note also possible bypass of + // vector code caused by legality checks is ignored, assigning all the weight + // to the vector loop, optimistically. + // + // For scalable vectorization we can't know at compile time how many + // iterations of the loop are handled in one vector iteration, so instead + // use the value of vscale used for tuning. + setProfileInfoAfterUnrolling(OrigLoop, VectorLoop, OrigLoop, EstimatedVFxUF); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LoopVectorizationPlanner::printPlans(raw_ostream &O) { if (VPlans.empty()) { |
