diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 231 |
1 files changed, 127 insertions, 104 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ef5f6e22f822..77c08839dbfa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -57,6 +57,7 @@ bool VPRecipeBase::mayWriteToMemory() const { case Instruction::Or: case Instruction::ICmp: case Instruction::Select: + case VPInstruction::AnyOf: case VPInstruction::Not: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: @@ -361,6 +362,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::PtrAdd: case VPInstruction::ExplicitVectorLength: + case VPInstruction::AnyOf: return true; default: return false; @@ -565,6 +567,9 @@ Value *VPInstruction::generate(VPTransformState &State) { if (Op != Instruction::ICmp && Op != Instruction::FCmp) ReducedPartRdx = Builder.CreateBinOp( (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); + else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) + ReducedPartRdx = + createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart); else ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); } @@ -573,7 +578,8 @@ Value *VPInstruction::generate(VPTransformState &State) { // Create the reduction after the loop. Note that inloop reductions create // the target reduction in the loop using a Reduction recipe. if ((State.VF.isVector() || - RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && + RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || + RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) && !PhiR->isInLoop()) { ReducedPartRdx = createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); @@ -615,8 +621,7 @@ Value *VPInstruction::generate(VPTransformState &State) { "can only generate first lane for PtrAdd"); Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *Addend = State.get(getOperand(1), VPLane(0)); - return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name) - : Builder.CreatePtrAdd(Ptr, Addend, Name); + return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); } case VPInstruction::ResumePhi: { Value *IncomingFromVPlanPred = @@ -624,18 +629,22 @@ Value *VPInstruction::generate(VPTransformState &State) { Value *IncomingFromOtherPreds = State.get(getOperand(1), /* IsScalar */ true); auto *NewPhi = - Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name); + Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name); BasicBlock *VPlanPred = State.CFG - .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())]; + .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])]; NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred); for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) { - assert(OtherPred != VPlanPred && - "VPlan predecessors should not be connected yet"); + if (OtherPred == VPlanPred) + continue; NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred); } return NewPhi; } + case VPInstruction::AnyOf: { + Value *A = State.get(getOperand(0)); + return Builder.CreateOrReduce(A); + } default: llvm_unreachable("Unsupported opcode for instruction"); @@ -644,7 +653,8 @@ Value *VPInstruction::generate(VPTransformState &State) { bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractFromEnd || - getOpcode() == VPInstruction::ComputeReductionResult; + getOpcode() == VPInstruction::ComputeReductionResult || + getOpcode() == VPInstruction::AnyOf; } bool VPInstruction::isSingleScalar() const { @@ -707,6 +717,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { return false; case Instruction::ICmp: case Instruction::Select: + case Instruction::Or: case VPInstruction::PtrAdd: // TODO: Cover additional opcodes. return vputils::onlyFirstLaneUsed(this); @@ -802,6 +813,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::PtrAdd: O << "ptradd"; break; + case VPInstruction::AnyOf: + O << "any-of"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -819,12 +833,13 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, void VPIRInstruction::execute(VPTransformState &State) { assert((isa<PHINode>(&I) || getNumOperands() == 0) && "Only PHINodes can have extra operands"); - if (getNumOperands() == 1) { - VPValue *ExitValue = getOperand(0); + for (const auto &[Idx, Op] : enumerate(operands())) { + VPValue *ExitValue = Op; auto Lane = vputils::isUniformAfterVectorization(ExitValue) ? VPLane::getFirstLane() : VPLane::getLastLaneForVF(State.VF); - auto *PredVPBB = cast<VPBasicBlock>(getParent()->getSinglePredecessor()); + VPBlockBase *Pred = getParent()->getPredecessors()[Idx]; + auto *PredVPBB = Pred->getExitingBasicBlock(); BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; // Set insertion point in PredBB in case an extract needs to be generated. // TODO: Model extracts explicitly. @@ -857,11 +872,13 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, O << Indent << "IR " << I; if (getNumOperands() != 0) { - assert(getNumOperands() == 1 && "can have at most 1 operand"); - O << " (extra operand: "; - getOperand(0)->printAsOperand(O, SlotTracker); - O << " from "; - getParent()->getPredecessors()[0]->printAsOperand(O); + O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": "; + interleaveComma( + enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) { + Op.value()->printAsOperand(O, SlotTracker); + O << " from "; + getParent()->getPredecessors()[Op.index()]->printAsOperand(O); + }); O << ")"; } } @@ -950,7 +967,8 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { // Some intrinsics have a scalar argument - don't replace it with a // vector. Value *Arg; - if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) + if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(), + State.TTI)) Arg = State.get(I.value(), VPLane(0)); else Arg = State.get(I.value(), onlyFirstLaneUsed(I.value())); @@ -964,7 +982,8 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { Module *M = State.Builder.GetInsertBlock()->getModule(); Function *VectorF = Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl); - assert(VectorF && "Can't retrieve vector intrinsic."); + assert(VectorF && + "Can't retrieve vector intrinsic or vector-predication intrinsics."); auto *CI = cast_or_null<CallInst>(getUnderlyingValue()); SmallVector<OperandBundleDef, 1> OpBundles; @@ -1012,11 +1031,11 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF, Arguments.push_back(V); } - Type *RetTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF); + Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); SmallVector<Type *> ParamTys; for (unsigned I = 0; I != getNumOperands(); ++I) ParamTys.push_back( - ToVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF)); + toVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF)); // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst. FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(); @@ -1184,7 +1203,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF, SelectInst *SI = cast<SelectInst>(getUnderlyingValue()); bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions(); Type *ScalarTy = Ctx.Types.inferScalarType(this); - Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF); + Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; VPValue *Op0, *Op1; @@ -1254,8 +1273,12 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { getFastMathFlags().print(O); break; case OperationType::GEPOp: - if (GEPFlags.IsInBounds) + if (GEPFlags.isInBounds()) O << " inbounds"; + else if (GEPFlags.hasNoUnsignedSignedWrap()) + O << " nusw"; + if (GEPFlags.hasNoUnsignedWrap()) + O << " nuw"; break; case OperationType::NonNegOp: if (NonNegFlags.NonNeg) @@ -1361,7 +1384,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; switch (Opcode) { case Instruction::FNeg: { - Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF); + Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); return Ctx.TTI.getArithmeticInstrCost( Opcode, VectorTy, CostKind, {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, @@ -1399,7 +1422,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue && getOperand(1)->isDefinedOutsideLoopRegions()) RHSInfo.Kind = TargetTransformInfo::OK_UniformValue; - Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF); + Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue()); SmallVector<const Value *, 4> Operands; @@ -1412,13 +1435,13 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, } case Instruction::Freeze: { // This opcode is unknown. Assume that it is the same as 'mul'. - Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF); + Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind); } case Instruction::ICmp: case Instruction::FCmp: { Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue()); - Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); + Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(), CostKind, {TTI::OK_AnyValue, TTI::OP_None}, @@ -1546,8 +1569,8 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF, } auto *SrcTy = - cast<VectorType>(ToVectorTy(Ctx.Types.inferScalarType(Operand), VF)); - auto *DestTy = cast<VectorType>(ToVectorTy(getResultType(), VF)); + cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF)); + auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF)); // Arm TTI will use the underlying instruction to determine the cost. return Ctx.TTI.getCastInstrCost( Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput, @@ -1559,7 +1582,7 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-CAST "; printAsOperand(O, SlotTracker); - O << " = " << Instruction::getOpcodeName(Opcode) << " "; + O << " = " << Instruction::getOpcodeName(Opcode); printFlags(O); printOperands(O, SlotTracker); O << " to " << *getResultType(); @@ -1572,10 +1595,10 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, } /// This function adds -/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) -/// to each vector element of Val. The sequence starts at StartIndex. +/// (0 * Step, 1 * Step, 2 * Step, ...) +/// to each vector element of Val. /// \p Opcode is relevant for FP induction variable. -static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, +static Value *getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder) { assert(VF.isVector() && "only vector VFs are supported"); @@ -1600,11 +1623,7 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, } Value *InitVec = Builder.CreateStepVector(InitVecValVTy); - // Splat the StartIdx - Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx); - if (STy->isIntegerTy()) { - InitVec = Builder.CreateAdd(InitVec, StartIdxSplat); Step = Builder.CreateVectorSplat(VLen, Step); assert(Step->getType() == Val->getType() && "Invalid step vec"); // FIXME: The newly created binary instructions should contain nsw/nuw @@ -1617,7 +1636,6 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && "Binary Opcode should be specified for FP induction"); InitVec = Builder.CreateUIToFP(InitVec, ValVTy); - InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat); Step = Builder.CreateVectorSplat(VLen, Step); Value *MulOp = Builder.CreateFMul(InitVec, Step); @@ -1638,12 +1656,13 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { const InductionDescriptor &ID = getInductionDescriptor(); TruncInst *Trunc = getTruncInst(); IRBuilderBase &Builder = State.Builder; - assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); + assert(getPHINode()->getType() == ID.getStartValue()->getType() && + "Types must match"); assert(State.VF.isVector() && "must have vector VF"); // The value from the original loop to which we are mapping the new induction // variable. - Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV; + Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode(); // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); @@ -1668,10 +1687,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); } - Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); - Value *SteppedStart = getStepVector( - SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder); + Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(), + State.VF, State.Builder); // We create vector phi nodes for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. @@ -1711,14 +1729,14 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { // factor. The last of those goes into the PHI. PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); - VecInd->setDebugLoc(EntryVal->getDebugLoc()); + VecInd->setDebugLoc(getDebugLoc()); State.set(this, VecInd); Instruction *LastInduction = cast<Instruction>( Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next")); if (isa<TruncInst>(EntryVal)) State.addMetadata(LastInduction, EntryVal); - LastInduction->setDebugLoc(EntryVal->getDebugLoc()); + LastInduction->setDebugLoc(getDebugLoc()); VecInd->addIncoming(SteppedStart, VectorPH); // Add induction update using an incorrect block temporarily. The phi node @@ -1732,20 +1750,13 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-INDUCTION"; - if (getTruncInst()) { - O << "\\l\""; - O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; - O << " +\n" << Indent << "\" "; - getVPValue(0)->printAsOperand(O, SlotTracker); - } else - O << " " << VPlanIngredient(IV); - - O << ", "; - getStepValue()->printAsOperand(O, SlotTracker); + O << Indent; + printAsOperand(O, SlotTracker); + O << " = WIDEN-INDUCTION "; + printOperands(O, SlotTracker); - O << ", "; - getVFValue()->printAsOperand(O, SlotTracker); + if (auto *TI = getTruncInst()) + O << " (truncated to " << *TI->getType() << ")"; } #endif @@ -1896,9 +1907,9 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { for (unsigned I = 0, E = getNumOperands(); I != E; I++) Ops.push_back(State.get(getOperand(I), VPLane(0))); - auto *NewGEP = - State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], - ArrayRef(Ops).drop_front(), "", isInBounds()); + auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], + ArrayRef(Ops).drop_front(), "", + getGEPNoWrapFlags()); Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP); State.set(this, Splat); State.addMetadata(Splat, GEP); @@ -1924,7 +1935,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // Create the new GEP. Note that this GEP may be a scalar if VF == 1, // but it should be a vector, otherwise. auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, - Indices, "", isInBounds()); + Indices, "", getGEPNoWrapFlags()); assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && "NewGEP is not a pointer vector"); State.set(this, NewGEP); @@ -1975,9 +1986,10 @@ void VPReverseVectorPointerRecipe::execute(VPTransformState &State) { // LastLane = 1 - RunTimeVF Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); Value *Ptr = State.get(getOperand(0), VPLane(0)); - bool InBounds = isInBounds(); - Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); - ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds); + Value *ResultPtr = + Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags()); + ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", + getGEPNoWrapFlags()); State.set(this, ResultPtr, /*IsScalar*/ true); } @@ -1987,9 +1999,8 @@ void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent; printAsOperand(O, SlotTracker); - O << " = reverse-vector-pointer "; - if (isInBounds()) - O << "inbounds "; + O << " = reverse-vector-pointer"; + printFlags(O); printOperands(O, SlotTracker); } #endif @@ -2001,10 +2012,10 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) { Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false, CurrentPart, Builder); Value *Ptr = State.get(getOperand(0), VPLane(0)); - bool InBounds = isInBounds(); Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); - Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); + Value *ResultPtr = + Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags()); State.set(this, ResultPtr, /*IsScalar*/ true); } @@ -2066,8 +2077,8 @@ InstructionCost VPBlendRecipe::computeCost(ElementCount VF, if (vputils::onlyFirstLaneUsed(this)) return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind); - Type *ResultTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF); - Type *CmpTy = ToVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF); + Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); + Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF); return (getNumIncomingValues() - 1) * Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); @@ -2104,6 +2115,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { // Propagate the fast-math flags carried by the underlying instruction. IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); + State.setDebugLocFrom(getDebugLoc()); Value *NewVecOp = State.get(getVecOp()); if (VPValue *Cond = getCondOp()) { Value *NewCond = State.get(Cond, State.VF.isScalar()); @@ -2188,7 +2200,7 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { RecurKind RdxKind = RdxDesc.getRecurrenceKind(); Type *ElementTy = Ctx.Types.inferScalarType(this); - auto *VectorTy = cast<VectorType>(ToVectorTy(ElementTy, VF)); + auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF)); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; unsigned Opcode = RdxDesc.getOpcode(); @@ -2380,6 +2392,7 @@ InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF, } void VPPredInstPHIRecipe::execute(VPTransformState &State) { + State.setDebugLocFrom(getDebugLoc()); assert(State.Lane && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = cast<Instruction>(State.get(getOperand(0), *State.Lane)); @@ -2439,7 +2452,7 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { - Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF); + Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); const Align Alignment = getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient)); unsigned AS = @@ -2586,7 +2599,7 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, // legacy model, it will always calculate the cost of mask. // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we // don't need to compare to the legacy cost model. - Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF); + Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); const Align Alignment = getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient)); unsigned AS = @@ -2707,7 +2720,7 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, // legacy model, it will always calculate the cost of mask. // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we // don't need to compare to the legacy cost model. - Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF); + Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); const Align Alignment = getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient)); unsigned AS = @@ -3075,7 +3088,7 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF, Type *ValTy = Ctx.Types.inferScalarType( getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx) : getStoredValues()[InsertPosIdx]); - auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF)); + auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF)); unsigned AS = getLoadStoreAddressSpace(InsertPos); enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; @@ -3111,31 +3124,14 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -bool VPCanonicalIVPHIRecipe::isCanonical( - InductionDescriptor::InductionKind Kind, VPValue *Start, - VPValue *Step) const { - // Must be an integer induction. - if (Kind != InductionDescriptor::IK_IntInduction) - return false; - // Start must match the start value of this canonical induction. - if (Start != getStartValue()) - return false; - - // If the step is defined by a recipe, it is not a ConstantInt. - if (Step->getDefiningRecipe()) - return false; - - ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue()); - return StepC && StepC->isOne(); -} - bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { return IsScalarAfterVectorization && (!IsScalable || vputils::onlyFirstLaneUsed(this)); } void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { - assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction && + assert(getInductionDescriptor().getKind() == + InductionDescriptor::IK_PtrInduction && "Not a pointer induction according to InductionDescriptor!"); assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() && "Unexpected type."); @@ -3160,6 +3156,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV->getIterator()); NewPointerPhi->addIncoming(ScalarStartValue, VectorPH); + NewPointerPhi->setDebugLoc(getDebugLoc()); } else { // The recipe has been unrolled. In that case, fetch the single pointer phi // shared among all unrolled parts of the recipe. @@ -3170,8 +3167,8 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { // A pointer induction, performed by using a gep BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint(); - Value *ScalarStepValue = State.get(getOperand(1), VPLane(0)); - Type *PhiType = IndDesc.getStep()->getType(); + Value *ScalarStepValue = State.get(getStepValue(), VPLane(0)); + Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue()); Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); // Add induction update using an incorrect block temporarily. The phi node // will be fixed after VPlan execution. Note that at this point the latch @@ -3223,7 +3220,8 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, printAsOperand(O, SlotTracker); O << " = WIDEN-POINTER-INDUCTION "; getStartValue()->printAsOperand(O, SlotTracker); - O << ", " << *IndDesc.getStep(); + O << ", "; + getStepValue()->printAsOperand(O, SlotTracker); if (getNumOperands() == 4) { O << ", "; getOperand(2)->printAsOperand(O, SlotTracker); @@ -3235,13 +3233,22 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, void VPExpandSCEVRecipe::execute(VPTransformState &State) { assert(!State.Lane && "cannot be used in per-lane"); + if (State.ExpandedSCEVs.contains(Expr)) { + // SCEV Expr has already been expanded, result must already be set. At the + // moment we have to execute the entry block twice (once before skeleton + // creation to get expanded SCEVs used by the skeleton and once during + // regular VPlan execution). + State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]); + assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] && + "Results must match"); + return; + } + const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); SCEVExpander Exp(SE, DL, "induction"); Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), &*State.Builder.GetInsertPoint()); - assert(!State.ExpandedSCEVs.contains(Expr) && - "Same SCEV expanded multiple times"); State.ExpandedSCEVs[Expr] = Res; State.set(this, Res, VPLane(0)); } @@ -3324,7 +3331,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, SmallVector<int> Mask(VF.getKnownMinValue()); std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1); Type *VectorTy = - ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); + toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice, cast<VectorType>(VectorTy), Mask, CostKind, @@ -3358,7 +3365,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { : VectorType::get(StartV->getType(), State.VF); BasicBlock *HeaderBB = State.CFG.PrevBB; - assert(State.CurrentVectorLoop->getHeader() == HeaderBB && + assert(State.CurrentParentLoop->getHeader() == HeaderBB && "recipe must be in the vector loop header"); auto *Phi = PHINode::Create(VecTy, 2, "vec.phi"); Phi->insertBefore(HeaderBB->getFirstInsertionPt()); @@ -3380,6 +3387,22 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { Builder.SetInsertPoint(VectorPH->getTerminator()); StartV = Iden = State.get(StartVPV); } + } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { + // [I|F]FindLastIV will use a sentinel value to initialize the reduction + // phi or the resume value from the main vector loop when vectorizing the + // epilogue loop. In the exit block, ComputeReductionResult will generate + // checks to verify if the reduction result is the sentinel value. If the + // result is the sentinel value, it will be corrected back to the start + // value. + // TODO: The sentinel value is not always necessary. When the start value is + // a constant, and smaller than the start value of the induction variable, + // the start value can be directly used to initialize the reduction phi. + Iden = StartV; + if (!ScalarPHI) { + IRBuilderBase::InsertPointGuard IPBuilder(Builder); + Builder.SetInsertPoint(VectorPH->getTerminator()); + StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden); + } } else { Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(), RdxDesc.getFastMathFlags()); @@ -3483,7 +3506,7 @@ void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, void VPScalarPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - Value *Start = State.get(getOperand(0), VPLane(0)); + Value *Start = State.get(getStartValue(), VPLane(0)); PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name); Phi->addIncoming(Start, VectorPH); Phi->setDebugLoc(getDebugLoc()); @@ -3493,7 +3516,7 @@ void VPScalarPHIRecipe::execute(VPTransformState &State) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << Indent << "SCALAR-PHI"; + O << Indent << "SCALAR-PHI "; printAsOperand(O, SlotTracker); O << " = phi "; printOperands(O, SlotTracker); |
