summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp42
1 files changed, 18 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index c048371b11d7..5c9b616e9bc2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -367,6 +367,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand);
setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i1, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i8, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i16, Expand);
+
+ setTruncStoreAction(MVT::v6i32, MVT::v6i1, Expand);
+ setTruncStoreAction(MVT::v6i32, MVT::v6i8, Expand);
+ setTruncStoreAction(MVT::v6i32, MVT::v6i16, Expand);
+
+ setTruncStoreAction(MVT::v7i32, MVT::v7i1, Expand);
+ setTruncStoreAction(MVT::v7i32, MVT::v7i8, Expand);
+ setTruncStoreAction(MVT::v7i32, MVT::v7i16, Expand);
+
setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);
@@ -411,7 +423,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
Expand);
- setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
+ setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand);
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
@@ -1427,8 +1439,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
- case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
- case ISD::FREM: return LowerFREM(Op, DAG);
+ case ISD::SDIVREM:
+ return LowerSDIVREM(Op, DAG);
case ISD::FCEIL: return LowerFCEIL(Op, DAG);
case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
case ISD::FRINT: return LowerFRINT(Op, DAG);
@@ -2423,21 +2435,6 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
return DAG.getMergeValues(Res, DL);
}
-// (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x)
-SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- EVT VT = Op.getValueType();
- auto Flags = Op->getFlags();
- SDValue X = Op.getOperand(0);
- SDValue Y = Op.getOperand(1);
-
- SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags);
- SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags);
- SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags);
- // TODO: For f32 use FMAD instead if !hasFastFMA32?
- return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags);
-}
-
SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
@@ -2650,10 +2647,7 @@ static bool valueIsKnownNeverF32Denorm(SDValue Src) {
bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG,
SDNodeFlags Flags) {
- if (Flags.hasApproximateFuncs())
- return true;
- auto &Options = DAG.getTarget().Options;
- return Options.ApproxFuncFPMath;
+ return Flags.hasApproximateFuncs();
}
bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG,
@@ -2775,8 +2769,7 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
assert(IsLog10 || Op.getOpcode() == ISD::FLOG);
const auto &Options = getTargetMachine().Options;
- if (VT == MVT::f16 || Flags.hasApproximateFuncs() ||
- Options.ApproxFuncFPMath) {
+ if (VT == MVT::f16 || Flags.hasApproximateFuncs()) {
if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
// Log and multiply in f32 is good enough for f16.
@@ -5674,6 +5667,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CALL)
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(TC_RETURN_GFX)
+ NODE_NAME_CASE(TC_RETURN_GFX_WholeWave)
NODE_NAME_CASE(TC_RETURN_CHAIN)
NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR)
NODE_NAME_CASE(TRAP)