diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.h')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.h | 250 |
1 files changed, 183 insertions, 67 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index d6bc462a0dfa..53291a931530 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -219,6 +219,9 @@ public: size_t getNumSuccessors() const { return Successors.size(); } size_t getNumPredecessors() const { return Predecessors.size(); } + /// Returns true if this block has any predecessors. + bool hasPredecessors() const { return !Predecessors.empty(); } + /// An Enclosing Block of a block B is any block containing B, including B /// itself. \return the closest enclosing block starting from "this", which /// has successors. \return the root enclosing block if all enclosing blocks @@ -400,7 +403,7 @@ class LLVM_ABI_FOR_TEST VPRecipeBase public: VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands, - DebugLoc DL = {}) + DebugLoc DL = DebugLoc::getUnknown()) : VPDef(SC), VPUser(Operands), DL(DL) {} virtual ~VPRecipeBase() = default; @@ -518,11 +521,11 @@ protected: class VPSingleDefRecipe : public VPRecipeBase, public VPValue { public: VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands, - DebugLoc DL = {}) + DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeBase(SC, Operands, DL), VPValue(this) {} VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands, - Value *UV, DebugLoc DL = {}) + Value *UV, DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {} static inline bool classof(const VPRecipeBase *R) { @@ -557,6 +560,7 @@ public: case VPRecipeBase::VPPartialReductionSC: return true; case VPRecipeBase::VPBranchOnMaskSC: + case VPRecipeBase::VPInterleaveEVLSC: case VPRecipeBase::VPInterleaveSC: case VPRecipeBase::VPIRInstructionSC: case VPRecipeBase::VPWidenLoadEVLSC: @@ -712,12 +716,15 @@ public: VPIRFlags(GEPNoWrapFlags GEPFlags) : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {} -public: void transferFlags(VPIRFlags &Other) { OpType = Other.OpType; AllFlags = Other.AllFlags; } + /// Only keep flags also present in \p Other. \p Other must have the same + /// OpType as the current object. + void intersectFlags(const VPIRFlags &Other); + /// Drop all poison-generating flags. void dropPoisonGeneratingFlags() { // NOTE: This needs to be kept in-sync with @@ -864,7 +871,7 @@ public: /// using IR flags. struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, - DebugLoc DL = {}) + DebugLoc DL = DebugLoc::getUnknown()) : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {} VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, @@ -872,7 +879,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {} VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, - const VPIRFlags &Flags, DebugLoc DL = {}) + const VPIRFlags &Flags, + DebugLoc DL = DebugLoc::getUnknown()) : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {} static inline bool classof(const VPRecipeBase *R) { @@ -900,6 +908,11 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { return R && classof(R); } + static inline bool classof(const VPSingleDefRecipe *U) { + auto *R = dyn_cast<VPRecipeBase>(U); + return R && classof(R); + } + void execute(VPTransformState &State) override = 0; /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx. @@ -975,6 +988,10 @@ public: Not, SLPLoad, SLPStore, + // Creates a mask where each lane is active (true) whilst the current + // counter (first operand + index) is less than the second operand. i.e. + // mask[i] = icmpt ult (op0 + i), op1 + // The size of the mask returned is VF * Multiplier (UF, third op). ActiveLaneMask, ExplicitVectorLength, CalculateTripCountMinusVF, @@ -1014,7 +1031,8 @@ public: // Returns a scalar boolean value, which is true if any lane of its // (boolean) vector operands is true. It produces the reduced value across // all unrolled iterations. Unrolling will add all copies of its original - // operand as additional operands. + // operand as additional operands. AnyOf is poison-safe as all operands + // will be frozen. AnyOf, // Calculates the first active lane index of the vector predicate operands. // It produces the lane index across all unrolled iterations. Unrolling will @@ -1080,13 +1098,13 @@ private: #endif public: - VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL = {}, - const Twine &Name = "") + VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, + DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL), VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {} VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, - const VPIRFlags &Flags, DebugLoc DL = {}, + const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = ""); VP_CLASSOF_IMPL(VPDef::VPInstructionSC) @@ -1479,7 +1497,8 @@ public: } VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - const VPIRFlags &Flags = {}, DebugLoc DL = {}) + const VPIRFlags &Flags = {}, + DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL), VPIRMetadata(), Opcode(Opcode), ResultTy(ResultTy) { assert(flagsValidForOpcode(Opcode) && @@ -1537,7 +1556,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef<VPValue *> CallArguments, Type *Ty, - DebugLoc DL = {}) + DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI), VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), MayReadFromMemory(CI.mayReadFromMemory()), @@ -1546,7 +1565,7 @@ public: VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef<VPValue *> CallArguments, Type *Ty, - DebugLoc DL = {}) + DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL), VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { LLVMContext &Ctx = Ty->getContext(); @@ -1615,7 +1634,8 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags, public: VPWidenCallRecipe(Value *UV, Function *Variant, - ArrayRef<VPValue *> CallArguments, DebugLoc DL = {}) + ArrayRef<VPValue *> CallArguments, + DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, *cast<Instruction>(UV)), VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) { @@ -1644,10 +1664,8 @@ public: return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()); } - operand_range args() { return make_range(op_begin(), std::prev(op_end())); } - const_operand_range args() const { - return make_range(op_begin(), std::prev(op_end())); - } + operand_range args() { return drop_end(operands()); } + const_operand_range args() const { return drop_end(operands()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -1667,7 +1685,7 @@ class VPHistogramRecipe : public VPRecipeBase { public: VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands, - DebugLoc DL = {}) + DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {} ~VPHistogramRecipe() override = default; @@ -1998,6 +2016,9 @@ public: return getOperand(1); } + /// Update the incoming value from the loop backedge. + void setBackedgeValue(VPValue *V) { setOperand(1, V); } + /// Returns the backedge value as a recipe. The backedge value is guaranteed /// to be a recipe. virtual VPRecipeBase &getBackedgeRecipe() { @@ -2229,8 +2250,8 @@ protected: public: /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and /// debug location \p DL. - VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, - const Twine &Name = "") + VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, + DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL), Name(Name.str()) { if (Start) @@ -2381,9 +2402,8 @@ public: } VPBlendRecipe *clone() override { - SmallVector<VPValue *> Ops(operands()); - return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()), Ops, - getDebugLoc()); + return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()), + operands(), getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPBlendSC) @@ -2409,6 +2429,12 @@ public: return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized()); } + /// Set mask number \p Idx to \p V. + void setMask(unsigned Idx, VPValue *V) { + assert((Idx > 0 || !isNormalized()) && "First index has no mask!"); + Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V); + } + void execute(VPTransformState &State) override { llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends"); } @@ -2434,12 +2460,13 @@ public: } }; -/// VPInterleaveRecipe is a recipe for transforming an interleave group of load -/// or stores into one wide load/store and shuffles. The first operand of a -/// VPInterleave recipe is the address, followed by the stored values, followed -/// by an optional mask. -class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase, - public VPIRMetadata { +/// A common base class for interleaved memory operations. +/// An Interleaved memory operation is a memory access method that combines +/// multiple strided loads/stores into a single wide load/store with shuffles. +/// The first operand is the start address. The optional operands are, in order, +/// the stored values and the mask. +class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase, + public VPIRMetadata { const InterleaveGroup<Instruction> *IG; /// Indicates if the interleave group is in a conditional block and requires a @@ -2450,12 +2477,14 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase, /// unusued gaps can be loaded speculatively. bool NeedsMaskForGaps = false; -public: - VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, - ArrayRef<VPValue *> StoredValues, VPValue *Mask, - bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL) - : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, DL), VPIRMetadata(MD), - IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) { +protected: + VPInterleaveBase(const unsigned char SC, + const InterleaveGroup<Instruction> *IG, + ArrayRef<VPValue *> Operands, + ArrayRef<VPValue *> StoredValues, VPValue *Mask, + bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL) + : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG), + NeedsMaskForGaps(NeedsMaskForGaps) { // TODO: extend the masked interleaved-group support to reversed access. assert((!Mask || !IG->isReverse()) && "Reversed masked interleave-group not supported."); @@ -2473,14 +2502,19 @@ public: addOperand(Mask); } } - ~VPInterleaveRecipe() override = default; - VPInterleaveRecipe *clone() override { - return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(), - NeedsMaskForGaps, *this, getDebugLoc()); +public: + VPInterleaveBase *clone() override = 0; + + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPInterleaveSC || + R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC; } - VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast<VPRecipeBase>(U); + return R && classof(R); + } /// Return the address accessed by this recipe. VPValue *getAddr() const { @@ -2490,48 +2524,130 @@ public: /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { - // Mask is optional and therefore the last, currently 2nd operand. + // Mask is optional and the last operand. return HasMask ? getOperand(getNumOperands() - 1) : nullptr; } + /// Return true if the access needs a mask because of the gaps. + bool needsMaskForGaps() const { return NeedsMaskForGaps; } + + const InterleaveGroup<Instruction> *getInterleaveGroup() const { return IG; } + + Instruction *getInsertPos() const { return IG->getInsertPos(); } + + void execute(VPTransformState &State) override { + llvm_unreachable("VPInterleaveBase should not be instantiated."); + } + + /// Return the cost of this recipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + + /// Returns true if the recipe only uses the first lane of operand \p Op. + virtual bool onlyFirstLaneUsed(const VPValue *Op) const override = 0; + + /// Returns the number of stored operands of this interleave group. Returns 0 + /// for load interleave groups. + virtual unsigned getNumStoreOperands() const = 0; + /// Return the VPValues stored by this interleave group. If it is a load /// interleave group, return an empty ArrayRef. ArrayRef<VPValue *> getStoredValues() const { - // The first operand is the address, followed by the stored values, followed - // by an optional mask. - return ArrayRef<VPValue *>(op_begin(), getNumOperands()) - .slice(1, getNumStoreOperands()); + return ArrayRef<VPValue *>(op_end() - + (getNumStoreOperands() + (HasMask ? 1 : 0)), + getNumStoreOperands()); + } +}; + +/// VPInterleaveRecipe is a recipe for transforming an interleave group of load +/// or stores into one wide load/store and shuffles. The first operand of a +/// VPInterleave recipe is the address, followed by the stored values, followed +/// by an optional mask. +class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase { +public: + VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, + ArrayRef<VPValue *> StoredValues, VPValue *Mask, + bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL) + : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask, + NeedsMaskForGaps, MD, DL) {} + + ~VPInterleaveRecipe() override = default; + + VPInterleaveRecipe *clone() override { + return new VPInterleaveRecipe(getInterleaveGroup(), getAddr(), + getStoredValues(), getMask(), + needsMaskForGaps(), *this, getDebugLoc()); } + VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) + /// Generate the wide load or store, and shuffles. void execute(VPTransformState &State) override; - /// Return the cost of this VPInterleaveRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override; - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif - const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); + } - /// Returns the number of stored operands of this interleave group. Returns 0 - /// for load interleave groups. - unsigned getNumStoreOperands() const { - return getNumOperands() - (HasMask ? 2 : 1); + unsigned getNumStoreOperands() const override { + return getNumOperands() - (getMask() ? 2 : 1); } +}; + +/// A recipe for interleaved memory operations with vector-predication +/// intrinsics. The first operand is the address, the second operand is the +/// explicit vector length. Stored values and mask are optional operands. +class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase { +public: + VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask) + : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(), + ArrayRef<VPValue *>({R.getAddr(), &EVL}), + R.getStoredValues(), Mask, R.needsMaskForGaps(), R, + R.getDebugLoc()) { + assert(!getInterleaveGroup()->isReverse() && + "Reversed interleave-group with tail folding is not supported."); + assert(!needsMaskForGaps() && "Interleaved access with gap mask is not " + "supported for scalable vector."); + } + + ~VPInterleaveEVLRecipe() override = default; + + VPInterleaveEVLRecipe *clone() override { + llvm_unreachable("cloning not implemented yet"); + } + + VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC) + + /// The VPValue of the explicit vector length. + VPValue *getEVL() const { return getOperand(1); } - /// The recipe only uses the first lane of the address. + /// Generate the wide load or store, and shuffles. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + /// The recipe only uses the first lane of the address, and EVL operand. bool onlyFirstLaneUsed(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); - return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); + return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) || + Op == getEVL(); } - Instruction *getInsertPos() const { return IG->getInsertPos(); } + unsigned getNumStoreOperands() const override { + return getNumOperands() - (getMask() ? 3 : 2); + } }; /// A recipe to represent inloop reduction operations, performing a reduction on @@ -2561,14 +2677,14 @@ protected: public: VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, - bool IsOrdered, DebugLoc DL = {}) + bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown()) : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I, ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, IsOrdered, DL) {} VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, - bool IsOrdered, DebugLoc DL = {}) + bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown()) : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr, ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, IsOrdered, DL) {} @@ -2686,7 +2802,7 @@ public: class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe { public: VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, - DebugLoc DL = {}) + DebugLoc DL = DebugLoc::getUnknown()) : VPReductionRecipe( VPDef::VPReductionEVLSC, R.getRecurrenceKind(), R.getFastMathFlags(), @@ -3537,7 +3653,8 @@ public: InductionOpcode(Opcode) {} VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, - VPValue *Step, VPValue *VF, DebugLoc DL = {}) + VPValue *Step, VPValue *VF, + DebugLoc DL = DebugLoc::getUnknown()) : VPScalarIVStepsRecipe( IV, Step, VF, IndDesc.getInductionOpcode(), dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()) @@ -4142,7 +4259,7 @@ public: /// Returns an iterator range over all VFs of the plan. iterator_range<SmallSetVector<ElementCount, 2>::iterator> vectorFactors() const { - return {VFs.begin(), VFs.end()}; + return VFs; } bool hasScalarVFOnly() const { @@ -4299,9 +4416,8 @@ public: /// via the other early exit). bool hasEarlyExit() const { return count_if(ExitBlocks, - [](VPIRBasicBlock *EB) { - return EB->getNumPredecessors() != 0; - }) > 1 || + [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) > + 1 || (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1); } @@ -4309,7 +4425,7 @@ public: /// that this relies on unneeded branches to the scalar tail loop being /// removed. bool hasScalarTail() const { - return !(getScalarPreheader()->getNumPredecessors() == 0 || + return !(!getScalarPreheader()->hasPredecessors() || getScalarPreheader()->getSinglePredecessor() == getEntry()); } }; |
