diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 47 |
1 files changed, 29 insertions, 18 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 9f8926432d00..e8674c4c7759 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -640,27 +640,38 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { break; } case Intrinsic::amdgcn_cvt_pkrtz: { - Value *Src0 = II.getArgOperand(0); - Value *Src1 = II.getArgOperand(1); - if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { - if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { - const fltSemantics &HalfSem = - II.getType()->getScalarType()->getFltSemantics(); + auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * { + Type *HalfTy = Type::getHalfTy(Arg->getContext()); + + if (isa<PoisonValue>(Arg)) + return PoisonValue::get(HalfTy); + if (isa<UndefValue>(Arg)) + return UndefValue::get(HalfTy); + + ConstantFP *CFP = nullptr; + if (match(Arg, m_ConstantFP(CFP))) { bool LosesInfo; - APFloat Val0 = C0->getValueAPF(); - APFloat Val1 = C1->getValueAPF(); - Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); - Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); - - Constant *Folded = - ConstantVector::get({ConstantFP::get(II.getContext(), Val0), - ConstantFP::get(II.getContext(), Val1)}); - return IC.replaceInstUsesWith(II, Folded); + APFloat Val(CFP->getValueAPF()); + Val.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo); + return ConstantFP::get(HalfTy, Val); } - } - if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) { - return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); + Value *Src = nullptr; + if (match(Arg, m_FPExt(m_Value(Src)))) { + if (Src->getType()->isHalfTy()) + return Src; + } + + return nullptr; + }; + + if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) { + if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) { + Value *V = PoisonValue::get(II.getType()); + V = IC.Builder.CreateInsertElement(V, Src0, (uint64_t)0); + V = IC.Builder.CreateInsertElement(V, Src1, (uint64_t)1); + return IC.replaceInstUsesWith(II, V); + } } break; |
