summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp8
1 files changed, 6 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 3e2b2c351056..03d16fdd54c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -607,13 +607,15 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
case ISD::FSUB:
if (ST->hasPackedFP32Ops() && SLT == MVT::f32)
NElts = (NElts + 1) / 2;
+ if (ST->hasBF16PackedInsts() && SLT == MVT::bf16)
+ NElts = (NElts + 1) / 2;
if (SLT == MVT::f64)
return LT.first * NElts * get64BitInstrCost(CostKind);
if (ST->has16BitInsts() && SLT == MVT::f16)
NElts = (NElts + 1) / 2;
- if (SLT == MVT::f32 || SLT == MVT::f16)
+ if (SLT == MVT::f32 || SLT == MVT::f16 || SLT == MVT::bf16)
return LT.first * NElts * getFullRateInstrCost();
break;
case ISD::FDIV:
@@ -746,7 +748,9 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
- if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
+ if ((ST->hasVOP3PInsts() &&
+ (SLT == MVT::f16 || SLT == MVT::i16 ||
+ (SLT == MVT::bf16 && ST->hasBF16PackedInsts()))) ||
(ST->hasPackedFP32Ops() && SLT == MVT::f32))
NElts = (NElts + 1) / 2;