diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 3e2b2c351056..03d16fdd54c4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -607,13 +607,15 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost( case ISD::FSUB: if (ST->hasPackedFP32Ops() && SLT == MVT::f32) NElts = (NElts + 1) / 2; + if (ST->hasBF16PackedInsts() && SLT == MVT::bf16) + NElts = (NElts + 1) / 2; if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); if (ST->has16BitInsts() && SLT == MVT::f16) NElts = (NElts + 1) / 2; - if (SLT == MVT::f32 || SLT == MVT::f16) + if (SLT == MVT::f32 || SLT == MVT::f16 || SLT == MVT::bf16) return LT.first * NElts * getFullRateInstrCost(); break; case ISD::FDIV: @@ -746,7 +748,9 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy; - if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || + if ((ST->hasVOP3PInsts() && + (SLT == MVT::f16 || SLT == MVT::i16 || + (SLT == MVT::bf16 && ST->hasBF16PackedInsts()))) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; |
