diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 75e20c793016..e844904ebd1e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -1195,18 +1195,37 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) { int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num, Value *Den, unsigned AtLeast, bool IsSigned) const { - unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); - if (LHSSignBits < AtLeast) - return -1; + assert(Num->getType()->getScalarSizeInBits() == + Den->getType()->getScalarSizeInBits()); + unsigned SSBits = Num->getType()->getScalarSizeInBits(); + if (IsSigned) { + unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); + if (RHSSignBits < AtLeast) + return -1; + + unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); + if (LHSSignBits < AtLeast) + return -1; + + unsigned SignBits = std::min(LHSSignBits, RHSSignBits); + unsigned DivBits = SSBits - SignBits + 1; + return DivBits; // a SignBit needs to be reserved for shrinking + } + + // All bits are used for unsigned division for Num or Den in range + // (SignedMax, UnsignedMax]. + KnownBits Known = computeKnownBits(Den, DL, 0, AC, &I); + if (Known.isNegative() || !Known.isNonNegative()) + return SSBits; + unsigned RHSSignBits = Known.countMinLeadingZeros(); - unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); - if (RHSSignBits < AtLeast) - return -1; + Known = computeKnownBits(Num, DL, 0, AC, &I); + if (Known.isNegative() || !Known.isNonNegative()) + return SSBits; + unsigned LHSSignBits = Known.countMinLeadingZeros(); unsigned SignBits = std::min(LHSSignBits, RHSSignBits); - unsigned DivBits = Num->getType()->getScalarSizeInBits() - SignBits; - if (IsSigned) - ++DivBits; + unsigned DivBits = SSBits - SignBits; return DivBits; } @@ -1220,7 +1239,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder, // If Num bits <= 24, assume 0 signbits. unsigned AtLeast = (SSBits <= 24) ? 0 : (SSBits - 24 + IsSigned); int DivBits = getDivNumBits(I, Num, Den, AtLeast, IsSigned); - if (DivBits == -1) + if (DivBits == -1 || DivBits > 24) return nullptr; return expandDivRem24Impl(Builder, I, Num, Den, DivBits, IsDiv, IsSigned); } @@ -1688,7 +1707,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) { Value *TrueVal = I.getTrueValue(); Value *FalseVal = I.getFalseValue(); Value *CmpVal; - FCmpInst::Predicate Pred; + CmpPredicate Pred; if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) { if (UA.isUniform(&I)) |
