diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 42 |
1 files changed, 18 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index c048371b11d7..5c9b616e9bc2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -367,6 +367,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand); setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand); + setTruncStoreAction(MVT::v5i32, MVT::v5i1, Expand); + setTruncStoreAction(MVT::v5i32, MVT::v5i8, Expand); + setTruncStoreAction(MVT::v5i32, MVT::v5i16, Expand); + + setTruncStoreAction(MVT::v6i32, MVT::v6i1, Expand); + setTruncStoreAction(MVT::v6i32, MVT::v6i8, Expand); + setTruncStoreAction(MVT::v6i32, MVT::v6i16, Expand); + + setTruncStoreAction(MVT::v7i32, MVT::v7i1, Expand); + setTruncStoreAction(MVT::v7i32, MVT::v7i8, Expand); + setTruncStoreAction(MVT::v7i32, MVT::v7i16, Expand); + setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand); setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand); setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand); @@ -411,7 +423,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64}, Expand); - setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); + setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand); if (Subtarget->has16BitInsts()) { setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal); @@ -1427,8 +1439,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); - case ISD::SDIVREM: return LowerSDIVREM(Op, DAG); - case ISD::FREM: return LowerFREM(Op, DAG); + case ISD::SDIVREM: + return LowerSDIVREM(Op, DAG); case ISD::FCEIL: return LowerFCEIL(Op, DAG); case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); case ISD::FRINT: return LowerFRINT(Op, DAG); @@ -2423,21 +2435,6 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, return DAG.getMergeValues(Res, DL); } -// (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x) -SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { - SDLoc SL(Op); - EVT VT = Op.getValueType(); - auto Flags = Op->getFlags(); - SDValue X = Op.getOperand(0); - SDValue Y = Op.getOperand(1); - - SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags); - SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags); - SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags); - // TODO: For f32 use FMAD instead if !hasFastFMA32? - return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags); -} - SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); @@ -2650,10 +2647,7 @@ static bool valueIsKnownNeverF32Denorm(SDValue Src) { bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags) { - if (Flags.hasApproximateFuncs()) - return true; - auto &Options = DAG.getTarget().Options; - return Options.ApproxFuncFPMath; + return Flags.hasApproximateFuncs(); } bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG, @@ -2775,8 +2769,7 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op, assert(IsLog10 || Op.getOpcode() == ISD::FLOG); const auto &Options = getTargetMachine().Options; - if (VT == MVT::f16 || Flags.hasApproximateFuncs() || - Options.ApproxFuncFPMath) { + if (VT == MVT::f16 || Flags.hasApproximateFuncs()) { if (VT == MVT::f16 && !Subtarget->has16BitInsts()) { // Log and multiply in f32 is good enough for f16. @@ -5674,6 +5667,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CALL) NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TC_RETURN_GFX) + NODE_NAME_CASE(TC_RETURN_GFX_WholeWave) NODE_NAME_CASE(TC_RETURN_CHAIN) NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR) NODE_NAME_CASE(TRAP) |
