summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp231
1 files changed, 127 insertions, 104 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ef5f6e22f822..77c08839dbfa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -57,6 +57,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case Instruction::Or:
case Instruction::ICmp:
case Instruction::Select:
+ case VPInstruction::AnyOf:
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
@@ -361,6 +362,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::PtrAdd:
case VPInstruction::ExplicitVectorLength:
+ case VPInstruction::AnyOf:
return true;
default:
return false;
@@ -565,6 +567,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
+ else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
+ ReducedPartRdx =
+ createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
@@ -573,7 +578,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
if ((State.VF.isVector() ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
+ RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
!PhiR->isInLoop()) {
ReducedPartRdx =
createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
@@ -615,8 +621,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
"can only generate first lane for PtrAdd");
Value *Ptr = State.get(getOperand(0), VPLane(0));
Value *Addend = State.get(getOperand(1), VPLane(0));
- return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name)
- : Builder.CreatePtrAdd(Ptr, Addend, Name);
+ return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
case VPInstruction::ResumePhi: {
Value *IncomingFromVPlanPred =
@@ -624,18 +629,22 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *IncomingFromOtherPreds =
State.get(getOperand(1), /* IsScalar */ true);
auto *NewPhi =
- Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
+ Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
BasicBlock *VPlanPred =
State.CFG
- .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
+ .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
- assert(OtherPred != VPlanPred &&
- "VPlan predecessors should not be connected yet");
+ if (OtherPred == VPlanPred)
+ continue;
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
}
return NewPhi;
}
+ case VPInstruction::AnyOf: {
+ Value *A = State.get(getOperand(0));
+ return Builder.CreateOrReduce(A);
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
@@ -644,7 +653,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractFromEnd ||
- getOpcode() == VPInstruction::ComputeReductionResult;
+ getOpcode() == VPInstruction::ComputeReductionResult ||
+ getOpcode() == VPInstruction::AnyOf;
}
bool VPInstruction::isSingleScalar() const {
@@ -707,6 +717,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return false;
case Instruction::ICmp:
case Instruction::Select:
+ case Instruction::Or:
case VPInstruction::PtrAdd:
// TODO: Cover additional opcodes.
return vputils::onlyFirstLaneUsed(this);
@@ -802,6 +813,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::PtrAdd:
O << "ptradd";
break;
+ case VPInstruction::AnyOf:
+ O << "any-of";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -819,12 +833,13 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
void VPIRInstruction::execute(VPTransformState &State) {
assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
"Only PHINodes can have extra operands");
- if (getNumOperands() == 1) {
- VPValue *ExitValue = getOperand(0);
+ for (const auto &[Idx, Op] : enumerate(operands())) {
+ VPValue *ExitValue = Op;
auto Lane = vputils::isUniformAfterVectorization(ExitValue)
? VPLane::getFirstLane()
: VPLane::getLastLaneForVF(State.VF);
- auto *PredVPBB = cast<VPBasicBlock>(getParent()->getSinglePredecessor());
+ VPBlockBase *Pred = getParent()->getPredecessors()[Idx];
+ auto *PredVPBB = Pred->getExitingBasicBlock();
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
// Set insertion point in PredBB in case an extract needs to be generated.
// TODO: Model extracts explicitly.
@@ -857,11 +872,13 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
O << Indent << "IR " << I;
if (getNumOperands() != 0) {
- assert(getNumOperands() == 1 && "can have at most 1 operand");
- O << " (extra operand: ";
- getOperand(0)->printAsOperand(O, SlotTracker);
- O << " from ";
- getParent()->getPredecessors()[0]->printAsOperand(O);
+ O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
+ interleaveComma(
+ enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
+ Op.value()->printAsOperand(O, SlotTracker);
+ O << " from ";
+ getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
+ });
O << ")";
}
}
@@ -950,7 +967,8 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
// Some intrinsics have a scalar argument - don't replace it with a
// vector.
Value *Arg;
- if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
+ if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
+ State.TTI))
Arg = State.get(I.value(), VPLane(0));
else
Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
@@ -964,7 +982,8 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
Module *M = State.Builder.GetInsertBlock()->getModule();
Function *VectorF =
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
- assert(VectorF && "Can't retrieve vector intrinsic.");
+ assert(VectorF &&
+ "Can't retrieve vector intrinsic or vector-predication intrinsics.");
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
SmallVector<OperandBundleDef, 1> OpBundles;
@@ -1012,11 +1031,11 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
Arguments.push_back(V);
}
- Type *RetTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
+ Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
SmallVector<Type *> ParamTys;
for (unsigned I = 0; I != getNumOperands(); ++I)
ParamTys.push_back(
- ToVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));
+ toVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));
// TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
@@ -1184,7 +1203,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,
SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
Type *ScalarTy = Ctx.Types.inferScalarType(this);
- Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
+ Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
VPValue *Op0, *Op1;
@@ -1254,8 +1273,12 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
getFastMathFlags().print(O);
break;
case OperationType::GEPOp:
- if (GEPFlags.IsInBounds)
+ if (GEPFlags.isInBounds())
O << " inbounds";
+ else if (GEPFlags.hasNoUnsignedSignedWrap())
+ O << " nusw";
+ if (GEPFlags.hasNoUnsignedWrap())
+ O << " nuw";
break;
case OperationType::NonNegOp:
if (NonNegFlags.NonNeg)
@@ -1361,7 +1384,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
switch (Opcode) {
case Instruction::FNeg: {
- Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
+ Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
return Ctx.TTI.getArithmeticInstrCost(
Opcode, VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
@@ -1399,7 +1422,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
getOperand(1)->isDefinedOutsideLoopRegions())
RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
- Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
+ Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
SmallVector<const Value *, 4> Operands;
@@ -1412,13 +1435,13 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
}
case Instruction::Freeze: {
// This opcode is unknown. Assume that it is the same as 'mul'.
- Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
+ Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
- Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
+ Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
CostKind,
{TTI::OK_AnyValue, TTI::OP_None},
@@ -1546,8 +1569,8 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
}
auto *SrcTy =
- cast<VectorType>(ToVectorTy(Ctx.Types.inferScalarType(Operand), VF));
- auto *DestTy = cast<VectorType>(ToVectorTy(getResultType(), VF));
+ cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
+ auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
// Arm TTI will use the underlying instruction to determine the cost.
return Ctx.TTI.getCastInstrCost(
Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
@@ -1559,7 +1582,7 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-CAST ";
printAsOperand(O, SlotTracker);
- O << " = " << Instruction::getOpcodeName(Opcode) << " ";
+ O << " = " << Instruction::getOpcodeName(Opcode);
printFlags(O);
printOperands(O, SlotTracker);
O << " to " << *getResultType();
@@ -1572,10 +1595,10 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
}
/// This function adds
-/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
-/// to each vector element of Val. The sequence starts at StartIndex.
+/// (0 * Step, 1 * Step, 2 * Step, ...)
+/// to each vector element of Val.
/// \p Opcode is relevant for FP induction variable.
-static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
+static Value *getStepVector(Value *Val, Value *Step,
Instruction::BinaryOps BinOp, ElementCount VF,
IRBuilderBase &Builder) {
assert(VF.isVector() && "only vector VFs are supported");
@@ -1600,11 +1623,7 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
}
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
- // Splat the StartIdx
- Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
-
if (STy->isIntegerTy()) {
- InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
// FIXME: The newly created binary instructions should contain nsw/nuw
@@ -1617,7 +1636,6 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
"Binary Opcode should be specified for FP induction");
InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
- InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
Step = Builder.CreateVectorSplat(VLen, Step);
Value *MulOp = Builder.CreateFMul(InitVec, Step);
@@ -1638,12 +1656,13 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
const InductionDescriptor &ID = getInductionDescriptor();
TruncInst *Trunc = getTruncInst();
IRBuilderBase &Builder = State.Builder;
- assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
+ assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
+ "Types must match");
assert(State.VF.isVector() && "must have vector VF");
// The value from the original loop to which we are mapping the new induction
// variable.
- Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
+ Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
@@ -1668,10 +1687,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
}
- Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
- Value *SteppedStart = getStepVector(
- SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
+ Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
+ State.VF, State.Builder);
// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
@@ -1711,14 +1729,14 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
// factor. The last of those goes into the PHI.
PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
- VecInd->setDebugLoc(EntryVal->getDebugLoc());
+ VecInd->setDebugLoc(getDebugLoc());
State.set(this, VecInd);
Instruction *LastInduction = cast<Instruction>(
Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
if (isa<TruncInst>(EntryVal))
State.addMetadata(LastInduction, EntryVal);
- LastInduction->setDebugLoc(EntryVal->getDebugLoc());
+ LastInduction->setDebugLoc(getDebugLoc());
VecInd->addIncoming(SteppedStart, VectorPH);
// Add induction update using an incorrect block temporarily. The phi node
@@ -1732,20 +1750,13 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-INDUCTION";
- if (getTruncInst()) {
- O << "\\l\"";
- O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
- O << " +\n" << Indent << "\" ";
- getVPValue(0)->printAsOperand(O, SlotTracker);
- } else
- O << " " << VPlanIngredient(IV);
-
- O << ", ";
- getStepValue()->printAsOperand(O, SlotTracker);
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = WIDEN-INDUCTION ";
+ printOperands(O, SlotTracker);
- O << ", ";
- getVFValue()->printAsOperand(O, SlotTracker);
+ if (auto *TI = getTruncInst())
+ O << " (truncated to " << *TI->getType() << ")";
}
#endif
@@ -1896,9 +1907,9 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
for (unsigned I = 0, E = getNumOperands(); I != E; I++)
Ops.push_back(State.get(getOperand(I), VPLane(0)));
- auto *NewGEP =
- State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
- ArrayRef(Ops).drop_front(), "", isInBounds());
+ auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
+ ArrayRef(Ops).drop_front(), "",
+ getGEPNoWrapFlags());
Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
State.set(this, Splat);
State.addMetadata(Splat, GEP);
@@ -1924,7 +1935,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
- Indices, "", isInBounds());
+ Indices, "", getGEPNoWrapFlags());
assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
State.set(this, NewGEP);
@@ -1975,9 +1986,10 @@ void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
// LastLane = 1 - RunTimeVF
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
- bool InBounds = isInBounds();
- Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
- ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
+ Value *ResultPtr =
+ Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
+ ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
+ getGEPNoWrapFlags());
State.set(this, ResultPtr, /*IsScalar*/ true);
}
@@ -1987,9 +1999,8 @@ void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
- O << " = reverse-vector-pointer ";
- if (isInBounds())
- O << "inbounds ";
+ O << " = reverse-vector-pointer";
+ printFlags(O);
printOperands(O, SlotTracker);
}
#endif
@@ -2001,10 +2012,10 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
CurrentPart, Builder);
Value *Ptr = State.get(getOperand(0), VPLane(0));
- bool InBounds = isInBounds();
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
- Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
+ Value *ResultPtr =
+ Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
State.set(this, ResultPtr, /*IsScalar*/ true);
}
@@ -2066,8 +2077,8 @@ InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
if (vputils::onlyFirstLaneUsed(this))
return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
- Type *ResultTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
- Type *CmpTy = ToVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
+ Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
+ Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
return (getNumIncomingValues() - 1) *
Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
@@ -2104,6 +2115,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
+ State.setDebugLocFrom(getDebugLoc());
Value *NewVecOp = State.get(getVecOp());
if (VPValue *Cond = getCondOp()) {
Value *NewCond = State.get(Cond, State.VF.isScalar());
@@ -2188,7 +2200,7 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
RecurKind RdxKind = RdxDesc.getRecurrenceKind();
Type *ElementTy = Ctx.Types.inferScalarType(this);
- auto *VectorTy = cast<VectorType>(ToVectorTy(ElementTy, VF));
+ auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
unsigned Opcode = RdxDesc.getOpcode();
@@ -2380,6 +2392,7 @@ InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,
}
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
+ State.setDebugLocFrom(getDebugLoc());
assert(State.Lane && "Predicated instruction PHI works per instance.");
Instruction *ScalarPredInst =
cast<Instruction>(State.get(getOperand(0), *State.Lane));
@@ -2439,7 +2452,7 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
- Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF);
+ Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
const Align Alignment =
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
unsigned AS =
@@ -2586,7 +2599,7 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
// legacy model, it will always calculate the cost of mask.
// TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
// don't need to compare to the legacy cost model.
- Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF);
+ Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
const Align Alignment =
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
unsigned AS =
@@ -2707,7 +2720,7 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
// legacy model, it will always calculate the cost of mask.
// TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
// don't need to compare to the legacy cost model.
- Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF);
+ Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
const Align Alignment =
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
unsigned AS =
@@ -3075,7 +3088,7 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
Type *ValTy = Ctx.Types.inferScalarType(
getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
: getStoredValues()[InsertPosIdx]);
- auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
+ auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
unsigned AS = getLoadStoreAddressSpace(InsertPos);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
@@ -3111,31 +3124,14 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-bool VPCanonicalIVPHIRecipe::isCanonical(
- InductionDescriptor::InductionKind Kind, VPValue *Start,
- VPValue *Step) const {
- // Must be an integer induction.
- if (Kind != InductionDescriptor::IK_IntInduction)
- return false;
- // Start must match the start value of this canonical induction.
- if (Start != getStartValue())
- return false;
-
- // If the step is defined by a recipe, it is not a ConstantInt.
- if (Step->getDefiningRecipe())
- return false;
-
- ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
- return StepC && StepC->isOne();
-}
-
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
return IsScalarAfterVectorization &&
(!IsScalable || vputils::onlyFirstLaneUsed(this));
}
void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
- assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction &&
+ assert(getInductionDescriptor().getKind() ==
+ InductionDescriptor::IK_PtrInduction &&
"Not a pointer induction according to InductionDescriptor!");
assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
"Unexpected type.");
@@ -3160,6 +3156,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
CanonicalIV->getIterator());
NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
+ NewPointerPhi->setDebugLoc(getDebugLoc());
} else {
// The recipe has been unrolled. In that case, fetch the single pointer phi
// shared among all unrolled parts of the recipe.
@@ -3170,8 +3167,8 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
// A pointer induction, performed by using a gep
BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
- Value *ScalarStepValue = State.get(getOperand(1), VPLane(0));
- Type *PhiType = IndDesc.getStep()->getType();
+ Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
+ Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
// Add induction update using an incorrect block temporarily. The phi node
// will be fixed after VPlan execution. Note that at this point the latch
@@ -3223,7 +3220,8 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
printAsOperand(O, SlotTracker);
O << " = WIDEN-POINTER-INDUCTION ";
getStartValue()->printAsOperand(O, SlotTracker);
- O << ", " << *IndDesc.getStep();
+ O << ", ";
+ getStepValue()->printAsOperand(O, SlotTracker);
if (getNumOperands() == 4) {
O << ", ";
getOperand(2)->printAsOperand(O, SlotTracker);
@@ -3235,13 +3233,22 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "cannot be used in per-lane");
+ if (State.ExpandedSCEVs.contains(Expr)) {
+ // SCEV Expr has already been expanded, result must already be set. At the
+ // moment we have to execute the entry block twice (once before skeleton
+ // creation to get expanded SCEVs used by the skeleton and once during
+ // regular VPlan execution).
+ State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
+ assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
+ "Results must match");
+ return;
+ }
+
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
&*State.Builder.GetInsertPoint());
- assert(!State.ExpandedSCEVs.contains(Expr) &&
- "Same SCEV expanded multiple times");
State.ExpandedSCEVs[Expr] = Res;
State.set(this, Res, VPLane(0));
}
@@ -3324,7 +3331,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
SmallVector<int> Mask(VF.getKnownMinValue());
std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
Type *VectorTy =
- ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
+ toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
cast<VectorType>(VectorTy), Mask, CostKind,
@@ -3358,7 +3365,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
: VectorType::get(StartV->getType(), State.VF);
BasicBlock *HeaderBB = State.CFG.PrevBB;
- assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
+ assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
"recipe must be in the vector loop header");
auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
Phi->insertBefore(HeaderBB->getFirstInsertionPt());
@@ -3380,6 +3387,22 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Builder.SetInsertPoint(VectorPH->getTerminator());
StartV = Iden = State.get(StartVPV);
}
+ } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
+ // [I|F]FindLastIV will use a sentinel value to initialize the reduction
+ // phi or the resume value from the main vector loop when vectorizing the
+ // epilogue loop. In the exit block, ComputeReductionResult will generate
+ // checks to verify if the reduction result is the sentinel value. If the
+ // result is the sentinel value, it will be corrected back to the start
+ // value.
+ // TODO: The sentinel value is not always necessary. When the start value is
+ // a constant, and smaller than the start value of the induction variable,
+ // the start value can be directly used to initialize the reduction phi.
+ Iden = StartV;
+ if (!ScalarPHI) {
+ IRBuilderBase::InsertPointGuard IPBuilder(Builder);
+ Builder.SetInsertPoint(VectorPH->getTerminator());
+ StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
+ }
} else {
Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
RdxDesc.getFastMathFlags());
@@ -3483,7 +3506,7 @@ void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
void VPScalarPHIRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- Value *Start = State.get(getOperand(0), VPLane(0));
+ Value *Start = State.get(getStartValue(), VPLane(0));
PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
Phi->addIncoming(Start, VectorPH);
Phi->setDebugLoc(getDebugLoc());
@@ -3493,7 +3516,7 @@ void VPScalarPHIRecipe::execute(VPTransformState &State) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << Indent << "SCALAR-PHI";
+ O << Indent << "SCALAR-PHI ";
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);