summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlan.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp73
1 files changed, 47 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a1c6f7977885..d16700922aff 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -635,9 +635,9 @@ static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
const VPRecipeBase *R = &VPBB->back();
bool IsSwitch = isa<VPInstruction>(R) &&
cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
- bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) ||
- match(R, m_BranchOnCond(m_VPValue())) ||
- match(R, m_BranchOnCount(m_VPValue(), m_VPValue()));
+ bool IsCondBranch =
+ isa<VPBranchOnMaskRecipe>(R) ||
+ match(R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()));
(void)IsCondBranch;
(void)IsSwitch;
if (VPBB->getNumSuccessors() == 2 ||
@@ -845,19 +845,10 @@ InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
if (VF.isScalable())
return InstructionCost::getInvalid();
- // First compute the cost of the conditionally executed recipes, followed by
- // account for the branching cost, except if the mask is a header mask or
- // uniform condition.
- using namespace llvm::VPlanPatternMatch;
+ // Compute and return the cost of the conditionally executed recipes.
+ assert(VF.isVector() && "Can only compute vector cost at the moment.");
VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
- InstructionCost ThenCost = Then->cost(VF, Ctx);
-
- // For the scalar case, we may not always execute the original predicated
- // block, Thus, scale the block's cost by the probability of executing it.
- if (VF.isScalar())
- return ThenCost / getPredBlockCostDivisor(Ctx.CostKind);
-
- return ThenCost;
+ return Then->cost(VF, Ctx);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -977,24 +968,36 @@ void VPlan::execute(VPTransformState *State) {
// logic generic during VPlan execution.
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
- } else {
+ }
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
+ // Generate code for the VPlan, in parts of the vector skeleton, loop body and
+ // successor blocks including the middle, exit and scalar preheader blocks.
+ for (VPBlockBase *Block : RPOT)
+ Block->execute(State);
+
+ // If the original loop is unreachable, delete it and all its blocks.
+ if (!ScalarPhVPBB->hasPredecessors()) {
+ // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
+ // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
+ // IR instructions.
+ for (VPIRBasicBlock *EB : getExitBlocks()) {
+ for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
+ if (R.getNumOperands() == 1)
+ R.eraseFromParent();
+ }
+ }
+
Loop *OrigLoop =
State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
- // If the original loop is unreachable, we need to delete it.
auto Blocks = OrigLoop->getBlocksVector();
Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
for (auto *BB : Blocks)
State->LI->removeBlock(BB);
+ DeleteDeadBlocks(Blocks, &State->CFG.DTU);
State->LI->erase(OrigLoop);
}
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
- Entry);
- // Generate code for the VPlan, in parts of the vector skeleton, loop body and
- // successor blocks including the middle, exit and scalar preheader blocks.
- for (VPBlockBase *Block : RPOT)
- Block->execute(State);
-
State->CFG.DTU.flush();
VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
@@ -1750,6 +1753,16 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
+bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
+ TTI::PartialReductionExtendKind ExtKind) {
+ APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
+ unsigned WideSize = C->getBitWidth();
+ APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
+ ? TruncatedVal.sext(WideSize)
+ : TruncatedVal.zext(WideSize);
+ return ExtendedVal == *C;
+}
+
TargetTransformInfo::OperandValueInfo
VPCostContext::getOperandInfo(VPValue *V) const {
if (!V->isLiveIn())
@@ -1759,10 +1772,14 @@ VPCostContext::getOperandInfo(VPValue *V) const {
}
InstructionCost VPCostContext::getScalarizationOverhead(
- Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF) {
+ Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
+ bool AlwaysIncludeReplicatingR) {
if (VF.isScalar())
return 0;
+ assert(!VF.isScalable() &&
+ "Scalarization overhead not supported for scalable vectors");
+
InstructionCost ScalarizationCost = 0;
// Compute the cost of scalarizing the result if needed.
if (!ResultTy->isVoidTy()) {
@@ -1779,7 +1796,11 @@ InstructionCost VPCostContext::getScalarizationOverhead(
SmallPtrSet<const VPValue *, 4> UniqueOperands;
SmallVector<Type *> Tys;
for (auto *Op : Operands) {
- if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
+ if (Op->isLiveIn() ||
+ (!AlwaysIncludeReplicatingR &&
+ isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op)) ||
+ (isa<VPReplicateRecipe>(Op) &&
+ cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
!UniqueOperands.insert(Op).second)
continue;
Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));