diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 294 |
1 files changed, 0 insertions, 294 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 22b921fb2084..5f1983791cfa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -45,12 +45,6 @@ static cl::opt<bool> WidenLoads( cl::ReallyHidden, cl::init(false)); -static cl::opt<bool> Widen16BitOps( - "amdgpu-codegenprepare-widen-16-bit-ops", - cl::desc( - "Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"), - cl::ReallyHidden, cl::init(false)); - static cl::opt<bool> BreakLargePHIs("amdgpu-codegenprepare-break-large-phis", cl::desc("Break large PHI nodes for DAGISel"), @@ -150,18 +144,6 @@ public: bool canBreakPHINode(const PHINode &I); - /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to - /// binary operation \p V. - /// - /// \returns Binary operation \p V. - /// \returns \p T's base element bit width. - unsigned getBaseElementBitWidth(const Type *T) const; - - /// \returns Equivalent 32 bit integer type for given type \p T. For example, - /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32> - /// is returned. - Type *getI32Ty(IRBuilder<> &B, const Type *T) const; - /// \returns True if binary operation \p I is a signed binary operation, false /// otherwise. bool isSigned(const BinaryOperator &I) const; @@ -170,10 +152,6 @@ public: /// signed 'icmp' operation, false otherwise. bool isSigned(const SelectInst &I) const; - /// \returns True if type \p T needs to be promoted to 32 bit integer type, - /// false otherwise. - bool needsPromotionToI32(const Type *T) const; - /// Return true if \p T is a legal scalar floating point type. bool isLegalFloatingTy(const Type *T) const; @@ -188,52 +166,6 @@ public: computeKnownFPClass(V, fcSubnormal, CtxI).isKnownNeverSubnormal(); } - /// Promotes uniform binary operation \p I to equivalent 32 bit binary - /// operation. - /// - /// \details \p I's base element bit width must be greater than 1 and less - /// than or equal 16. Promotion is done by sign or zero extending operands to - /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and - /// truncating the result of 32 bit binary operation back to \p I's original - /// type. Division operation is not promoted. - /// - /// \returns True if \p I is promoted to equivalent 32 bit binary operation, - /// false otherwise. - bool promoteUniformOpToI32(BinaryOperator &I) const; - - /// Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation. - /// - /// \details \p I's base element bit width must be greater than 1 and less - /// than or equal 16. Promotion is done by sign or zero extending operands to - /// 32 bits, and replacing \p I with 32 bit 'icmp' operation. - /// - /// \returns True. - bool promoteUniformOpToI32(ICmpInst &I) const; - - /// Promotes uniform 'select' operation \p I to 32 bit 'select' - /// operation. - /// - /// \details \p I's base element bit width must be greater than 1 and less - /// than or equal 16. Promotion is done by sign or zero extending operands to - /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the - /// result of 32 bit 'select' operation back to \p I's original type. - /// - /// \returns True. - bool promoteUniformOpToI32(SelectInst &I) const; - - /// Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse' - /// intrinsic. - /// - /// \details \p I's base element bit width must be greater than 1 and less - /// than or equal 16. Promotion is done by zero extending the operand to 32 - /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the - /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the - /// shift amount is 32 minus \p I's base element bit width), and truncating - /// the result of the shift operation back to \p I's original type. - /// - /// \returns True. - bool promoteUniformBitreverseToI32(IntrinsicInst &I) const; - /// \returns The minimum number of bits needed to store the value of \Op as an /// unsigned integer. Truncating to this size and then zero-extending to /// the original will not change the value. @@ -320,13 +252,11 @@ public: bool visitInstruction(Instruction &I) { return false; } bool visitBinaryOperator(BinaryOperator &I); bool visitLoadInst(LoadInst &I); - bool visitICmpInst(ICmpInst &I); bool visitSelectInst(SelectInst &I); bool visitPHINode(PHINode &I); bool visitAddrSpaceCastInst(AddrSpaceCastInst &I); bool visitIntrinsicInst(IntrinsicInst &I); - bool visitBitreverseIntrinsicInst(IntrinsicInst &I); bool visitFMinLike(IntrinsicInst &I); bool visitSqrt(IntrinsicInst &I); bool run(); @@ -380,22 +310,6 @@ bool AMDGPUCodeGenPrepareImpl::run() { return MadeChange; } -unsigned AMDGPUCodeGenPrepareImpl::getBaseElementBitWidth(const Type *T) const { - assert(needsPromotionToI32(T) && "T does not need promotion to i32"); - - if (T->isIntegerTy()) - return T->getIntegerBitWidth(); - return cast<VectorType>(T)->getElementType()->getIntegerBitWidth(); -} - -Type *AMDGPUCodeGenPrepareImpl::getI32Ty(IRBuilder<> &B, const Type *T) const { - assert(needsPromotionToI32(T) && "T does not need promotion to i32"); - - if (T->isIntegerTy()) - return B.getInt32Ty(); - return FixedVectorType::get(B.getInt32Ty(), cast<FixedVectorType>(T)); -} - bool AMDGPUCodeGenPrepareImpl::isSigned(const BinaryOperator &I) const { return I.getOpcode() == Instruction::AShr || I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem; @@ -406,59 +320,11 @@ bool AMDGPUCodeGenPrepareImpl::isSigned(const SelectInst &I) const { cast<ICmpInst>(I.getOperand(0))->isSigned(); } -bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const { - if (!Widen16BitOps) - return false; - - const IntegerType *IntTy = dyn_cast<IntegerType>(T); - if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16) - return true; - - if (const VectorType *VT = dyn_cast<VectorType>(T)) { - // TODO: The set of packed operations is more limited, so may want to - // promote some anyway. - if (ST.hasVOP3PInsts()) - return false; - - return needsPromotionToI32(VT->getElementType()); - } - - return false; -} - bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const { return Ty->isFloatTy() || Ty->isDoubleTy() || (Ty->isHalfTy() && ST.has16BitInsts()); } -// Return true if the op promoted to i32 should have nsw set. -static bool promotedOpIsNSW(const Instruction &I) { - switch (I.getOpcode()) { - case Instruction::Shl: - case Instruction::Add: - case Instruction::Sub: - return true; - case Instruction::Mul: - return I.hasNoUnsignedWrap(); - default: - return false; - } -} - -// Return true if the op promoted to i32 should have nuw set. -static bool promotedOpIsNUW(const Instruction &I) { - switch (I.getOpcode()) { - case Instruction::Shl: - case Instruction::Add: - case Instruction::Mul: - return true; - case Instruction::Sub: - return I.hasNoUnsignedWrap(); - default: - return false; - } -} - bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const { Type *Ty = I.getType(); int TySize = DL.getTypeSizeInBits(Ty); @@ -467,134 +333,6 @@ bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const { return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&I); } -bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const { - assert(needsPromotionToI32(I.getType()) && - "I does not need promotion to i32"); - - if (I.getOpcode() == Instruction::SDiv || - I.getOpcode() == Instruction::UDiv || - I.getOpcode() == Instruction::SRem || - I.getOpcode() == Instruction::URem) - return false; - - IRBuilder<> Builder(&I); - Builder.SetCurrentDebugLocation(I.getDebugLoc()); - - Type *I32Ty = getI32Ty(Builder, I.getType()); - Value *ExtOp0 = nullptr; - Value *ExtOp1 = nullptr; - Value *ExtRes = nullptr; - Value *TruncRes = nullptr; - - if (isSigned(I)) { - ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); - ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); - } else { - ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); - ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); - } - - ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1); - if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) { - if (promotedOpIsNSW(cast<Instruction>(I))) - Inst->setHasNoSignedWrap(); - - if (promotedOpIsNUW(cast<Instruction>(I))) - Inst->setHasNoUnsignedWrap(); - - if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) - Inst->setIsExact(ExactOp->isExact()); - } - - TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); - - I.replaceAllUsesWith(TruncRes); - I.eraseFromParent(); - - return true; -} - -bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(ICmpInst &I) const { - assert(needsPromotionToI32(I.getOperand(0)->getType()) && - "I does not need promotion to i32"); - - IRBuilder<> Builder(&I); - Builder.SetCurrentDebugLocation(I.getDebugLoc()); - - Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType()); - Value *ExtOp0 = nullptr; - Value *ExtOp1 = nullptr; - Value *NewICmp = nullptr; - - if (I.isSigned()) { - ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); - ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); - } else { - ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); - ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); - } - NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1); - - I.replaceAllUsesWith(NewICmp); - I.eraseFromParent(); - - return true; -} - -bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(SelectInst &I) const { - assert(needsPromotionToI32(I.getType()) && - "I does not need promotion to i32"); - - IRBuilder<> Builder(&I); - Builder.SetCurrentDebugLocation(I.getDebugLoc()); - - Type *I32Ty = getI32Ty(Builder, I.getType()); - Value *ExtOp1 = nullptr; - Value *ExtOp2 = nullptr; - Value *ExtRes = nullptr; - Value *TruncRes = nullptr; - - if (isSigned(I)) { - ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); - ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty); - } else { - ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); - ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty); - } - ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2); - TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); - - I.replaceAllUsesWith(TruncRes); - I.eraseFromParent(); - - return true; -} - -bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32( - IntrinsicInst &I) const { - assert(I.getIntrinsicID() == Intrinsic::bitreverse && - "I must be bitreverse intrinsic"); - assert(needsPromotionToI32(I.getType()) && - "I does not need promotion to i32"); - - IRBuilder<> Builder(&I); - Builder.SetCurrentDebugLocation(I.getDebugLoc()); - - Type *I32Ty = getI32Ty(Builder, I.getType()); - Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); - Value *ExtRes = - Builder.CreateIntrinsic(Intrinsic::bitreverse, {I32Ty}, {ExtOp}); - Value *LShrOp = - Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType())); - Value *TruncRes = - Builder.CreateTrunc(LShrOp, I.getType()); - - I.replaceAllUsesWith(TruncRes); - I.eraseFromParent(); - - return true; -} - unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const { return computeKnownBits(Op, DL, AC).countMaxActiveBits(); } @@ -1635,10 +1373,6 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) { if (foldBinOpIntoSelect(I)) return true; - if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) && - UA.isUniform(&I) && promoteUniformOpToI32(I)) - return true; - if (UseMul24Intrin && replaceMulWithMul24(I)) return true; if (tryNarrowMathIfNoOverflow(&I, ST.getTargetLowering(), @@ -1770,16 +1504,6 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) { return false; } -bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) { - bool Changed = false; - - if (ST.has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) && - UA.isUniform(&I)) - Changed |= promoteUniformOpToI32(I); - - return Changed; -} - bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) { Value *Cond = I.getCondition(); Value *TrueVal = I.getTrueValue(); @@ -1787,12 +1511,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) { Value *CmpVal; CmpPredicate Pred; - if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) { - if (UA.isUniform(&I)) - return promoteUniformOpToI32(I); - return false; - } - // Match fract pattern with nan check. if (!match(Cond, m_FCmp(Pred, m_Value(CmpVal), m_NonNaN()))) return false; @@ -2196,8 +1914,6 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { - case Intrinsic::bitreverse: - return visitBitreverseIntrinsicInst(I); case Intrinsic::minnum: case Intrinsic::minimumnum: case Intrinsic::minimum: @@ -2209,16 +1925,6 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) { } } -bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) { - bool Changed = false; - - if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) && - UA.isUniform(&I)) - Changed |= promoteUniformBitreverseToI32(I); - - return Changed; -} - /// Match non-nan fract pattern. /// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0)) /// minimumnum(fsub(x, floor(x)), nextafter(1.0, -1.0)) |
