diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 73 |
1 files changed, 47 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index a1c6f7977885..d16700922aff 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -635,9 +635,9 @@ static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { const VPRecipeBase *R = &VPBB->back(); bool IsSwitch = isa<VPInstruction>(R) && cast<VPInstruction>(R)->getOpcode() == Instruction::Switch; - bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) || - match(R, m_BranchOnCond(m_VPValue())) || - match(R, m_BranchOnCount(m_VPValue(), m_VPValue())); + bool IsCondBranch = + isa<VPBranchOnMaskRecipe>(R) || + match(R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount())); (void)IsCondBranch; (void)IsSwitch; if (VPBB->getNumSuccessors() == 2 || @@ -845,19 +845,10 @@ InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { if (VF.isScalable()) return InstructionCost::getInvalid(); - // First compute the cost of the conditionally executed recipes, followed by - // account for the branching cost, except if the mask is a header mask or - // uniform condition. - using namespace llvm::VPlanPatternMatch; + // Compute and return the cost of the conditionally executed recipes. + assert(VF.isVector() && "Can only compute vector cost at the moment."); VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]); - InstructionCost ThenCost = Then->cost(VF, Ctx); - - // For the scalar case, we may not always execute the original predicated - // block, Thus, scale the block's cost by the probability of executing it. - if (VF.isScalar()) - return ThenCost / getPredBlockCostDivisor(Ctx.CostKind); - - return ThenCost; + return Then->cost(VF, Ctx); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -977,24 +968,36 @@ void VPlan::execute(VPTransformState *State) { // logic generic during VPlan execution. State->CFG.DTU.applyUpdates( {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}}); - } else { + } + ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( + Entry); + // Generate code for the VPlan, in parts of the vector skeleton, loop body and + // successor blocks including the middle, exit and scalar preheader blocks. + for (VPBlockBase *Block : RPOT) + Block->execute(State); + + // If the original loop is unreachable, delete it and all its blocks. + if (!ScalarPhVPBB->hasPredecessors()) { + // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit + // VPIRBBs in VPlan as well, otherwise we would retain references to deleted + // IR instructions. + for (VPIRBasicBlock *EB : getExitBlocks()) { + for (VPRecipeBase &R : make_early_inc_range(EB->phis())) { + if (R.getNumOperands() == 1) + R.eraseFromParent(); + } + } + Loop *OrigLoop = State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock()); - // If the original loop is unreachable, we need to delete it. auto Blocks = OrigLoop->getBlocksVector(); Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock()); for (auto *BB : Blocks) State->LI->removeBlock(BB); + DeleteDeadBlocks(Blocks, &State->CFG.DTU); State->LI->erase(OrigLoop); } - ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( - Entry); - // Generate code for the VPlan, in parts of the vector skeleton, loop body and - // successor blocks including the middle, exit and scalar preheader blocks. - for (VPBlockBase *Block : RPOT) - Block->execute(State); - State->CFG.DTU.flush(); VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT); @@ -1750,6 +1753,16 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) { } #endif +bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType, + TTI::PartialReductionExtendKind ExtKind) { + APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits()); + unsigned WideSize = C->getBitWidth(); + APInt ExtendedVal = ExtKind == TTI::PR_SignExtend + ? TruncatedVal.sext(WideSize) + : TruncatedVal.zext(WideSize); + return ExtendedVal == *C; +} + TargetTransformInfo::OperandValueInfo VPCostContext::getOperandInfo(VPValue *V) const { if (!V->isLiveIn()) @@ -1759,10 +1772,14 @@ VPCostContext::getOperandInfo(VPValue *V) const { } InstructionCost VPCostContext::getScalarizationOverhead( - Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF) { + Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF, + bool AlwaysIncludeReplicatingR) { if (VF.isScalar()) return 0; + assert(!VF.isScalable() && + "Scalarization overhead not supported for scalable vectors"); + InstructionCost ScalarizationCost = 0; // Compute the cost of scalarizing the result if needed. if (!ResultTy->isVoidTy()) { @@ -1779,7 +1796,11 @@ InstructionCost VPCostContext::getScalarizationOverhead( SmallPtrSet<const VPValue *, 4> UniqueOperands; SmallVector<Type *> Tys; for (auto *Op : Operands) { - if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) || + if (Op->isLiveIn() || + (!AlwaysIncludeReplicatingR && + isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op)) || + (isa<VPReplicateRecipe>(Op) && + cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) || !UniqueOperands.insert(Op).second) continue; Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF)); |
