summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlan.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp146
1 files changed, 138 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f972efa07eb7..16b1b539345d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <cassert>
#include <string>
@@ -55,6 +56,15 @@ namespace llvm {
extern cl::opt<bool> EnableVPlanNativePath;
}
+/// @{
+/// Metadata attribute names
+const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
+const char LLVMLoopVectorizeFollowupVectorized[] =
+ "llvm.loop.vectorize.followup_vectorized";
+const char LLVMLoopVectorizeFollowupEpilogue[] =
+ "llvm.loop.vectorize.followup_epilogue";
+/// @}
+
extern cl::opt<unsigned> ForceTargetInstructionCost;
static cl::opt<bool> PrintVPlansInDotFormat(
@@ -143,7 +153,7 @@ template <typename T> static T *getPlanEntry(T *Start) {
for (unsigned i = 0; i < WorkList.size(); i++) {
T *Current = WorkList[i];
- if (Current->getNumPredecessors() == 0)
+ if (!Current->hasPredecessors())
return Current;
auto &Predecessors = Current->getPredecessors();
WorkList.insert_range(Predecessors);
@@ -216,7 +226,7 @@ bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
// If VPBB is in a region R, VPBB is a loop header if R is a loop region with
// VPBB as its entry, i.e., free of predecessors.
if (auto *R = VPBB->getParent())
- return !R->isReplicator() && VPBB->getNumPredecessors() == 0;
+ return !R->isReplicator() && !VPBB->hasPredecessors();
// A header dominates its second predecessor (the latch), with the other
// predecessor being the preheader
@@ -493,6 +503,9 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
void VPIRBasicBlock::execute(VPTransformState *State) {
assert(getHierarchicalSuccessors().size() <= 2 &&
"VPIRBasicBlock can have at most two successors at the moment!");
+ // Move completely disconnected blocks to their final position.
+ if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
+ IRBB->moveAfter(State->CFG.PrevBB);
State->Builder.SetInsertPoint(IRBB->getTerminator());
State->CFG.PrevBB = IRBB;
State->CFG.VPBB2IRBB[this] = IRBB;
@@ -809,7 +822,7 @@ InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
const VPBlockBase *Pred = nullptr;
- if (getNumPredecessors() > 0) {
+ if (hasPredecessors()) {
Pred = getPredecessors()[Idx];
} else {
auto *Region = getParent();
@@ -1183,14 +1196,14 @@ VPlan *VPlan::duplicate() {
BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
VPIRBasicBlock *NewScalarHeader = nullptr;
- if (getScalarHeader()->getNumPredecessors() == 0) {
- NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
- } else {
+ if (getScalarHeader()->hasPredecessors()) {
NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
}));
+ } else {
+ NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
}
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
@@ -1473,7 +1486,7 @@ void VPSlotTracker::assignName(const VPValue *V) {
std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
// First assign the base name for V.
- const auto &[A, _] = VPValue2Name.insert({V, BaseName});
+ const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
// Integer or FP constants with different types will result in he same string
// due to stripping types.
if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
@@ -1481,7 +1494,7 @@ void VPSlotTracker::assignName(const VPValue *V) {
// If it is already used by C > 0 other VPValues, increase the version counter
// C and use it for V.
- const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0});
+ const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
if (!UseInserted) {
C->second++;
A->second = (BaseName + Twine(".") + Twine(C->second)).str();
@@ -1612,6 +1625,123 @@ VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const {
llvm_unreachable("No plan found!");
}
+static void addRuntimeUnrollDisableMetaData(Loop *L) {
+ SmallVector<Metadata *, 4> MDs;
+ // Reserve first location for self reference to the LoopID metadata node.
+ MDs.push_back(nullptr);
+ bool IsUnrollMetadata = false;
+ MDNode *LoopID = L->getLoopID();
+ if (LoopID) {
+ // First find existing loop unrolling disable metadata.
+ for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
+ auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
+ if (MD) {
+ const auto *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+ if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
+ continue;
+ IsUnrollMetadata =
+ S->getString().starts_with("llvm.loop.unroll.disable");
+ }
+ MDs.push_back(LoopID->getOperand(I));
+ }
+ }
+
+ if (!IsUnrollMetadata) {
+ // Add runtime unroll disable metadata.
+ LLVMContext &Context = L->getHeader()->getContext();
+ SmallVector<Metadata *, 1> DisableOperands;
+ DisableOperands.push_back(
+ MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ MDs.push_back(DisableNode);
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L->setLoopID(NewLoopID);
+ }
+}
+
+void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
+ Loop *VectorLoop, VPBasicBlock *HeaderVPBB, bool VectorizingEpilogue,
+ unsigned EstimatedVFxUF, bool DisableRuntimeUnroll) {
+ MDNode *LID = OrigLoop->getLoopID();
+ // Update the metadata of the scalar loop. Skip the update when vectorizing
+ // the epilogue loop, to ensure it is only updated once.
+ if (!VectorizingEpilogue) {
+ std::optional<MDNode *> RemainderLoopID = makeFollowupLoopID(
+ LID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupEpilogue});
+ if (RemainderLoopID) {
+ OrigLoop->setLoopID(*RemainderLoopID);
+ } else {
+ if (DisableRuntimeUnroll)
+ addRuntimeUnrollDisableMetaData(OrigLoop);
+
+ LoopVectorizeHints Hints(OrigLoop, true, *ORE);
+ Hints.setAlreadyVectorized();
+ }
+ }
+
+ if (!VectorLoop)
+ return;
+
+ if (std::optional<MDNode *> VectorizedLoopID =
+ makeFollowupLoopID(LID, {LLVMLoopVectorizeFollowupAll,
+ LLVMLoopVectorizeFollowupVectorized})) {
+ VectorLoop->setLoopID(*VectorizedLoopID);
+ } else {
+ // Keep all loop hints from the original loop on the vector loop (we'll
+ // replace the vectorizer-specific hints below).
+ if (LID)
+ VectorLoop->setLoopID(LID);
+
+ if (!VectorizingEpilogue) {
+ LoopVectorizeHints Hints(VectorLoop, true, *ORE);
+ Hints.setAlreadyVectorized();
+ }
+
+ // Check if it's EVL-vectorized and mark the corresponding metadata.
+ bool IsEVLVectorized =
+ llvm::any_of(*HeaderVPBB, [](const VPRecipeBase &Recipe) {
+ // Looking for the ExplictVectorLength VPInstruction.
+ if (const auto *VI = dyn_cast<VPInstruction>(&Recipe))
+ return VI->getOpcode() == VPInstruction::ExplicitVectorLength;
+ return false;
+ });
+ if (IsEVLVectorized) {
+ LLVMContext &Context = VectorLoop->getHeader()->getContext();
+ MDNode *LoopID = VectorLoop->getLoopID();
+ auto *IsEVLVectorizedMD = MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.isvectorized.tailfoldingstyle"),
+ MDString::get(Context, "evl")});
+ MDNode *NewLoopID = makePostTransformationMetadata(Context, LoopID, {},
+ {IsEVLVectorizedMD});
+ VectorLoop->setLoopID(NewLoopID);
+ }
+ }
+ TargetTransformInfo::UnrollingPreferences UP;
+ TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
+ if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
+ addRuntimeUnrollDisableMetaData(VectorLoop);
+
+ // Set/update profile weights for the vector and remainder loops as original
+ // loop iterations are now distributed among them. Note that original loop
+ // becomes the scalar remainder loop after vectorization.
+ //
+ // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
+ // end up getting slightly roughened result but that should be OK since
+ // profile is not inherently precise anyway. Note also possible bypass of
+ // vector code caused by legality checks is ignored, assigning all the weight
+ // to the vector loop, optimistically.
+ //
+ // For scalable vectorization we can't know at compile time how many
+ // iterations of the loop are handled in one vector iteration, so instead
+ // use the value of vscale used for tuning.
+ setProfileInfoAfterUnrolling(OrigLoop, VectorLoop, OrigLoop, EstimatedVFxUF);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
if (VPlans.empty()) {