diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOPInstructions.td | 17 |
5 files changed, 32 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index df0320fd0f17..f97ea40caa67 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -9292,6 +9292,7 @@ static bool isRenamedInGFX9(int Opcode) { GENERATE_RENAMED_GFX9_CASES(AMDGPU::V_SUB_U32) // case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64: + case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: case AMDGPU::V_INTERP_P2_F16: case AMDGPU::V_MAD_F16_e64: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b1f93a447a7b..532df39e82a7 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3708,12 +3708,15 @@ def : IntMinMaxPat<V_MAXMIN_U32_e64, umin, umax_oneuse>; def : IntMinMaxPat<V_MINMAX_U32_e64, umax, umin_oneuse>; def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>; def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>; -def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; -def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>; def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>; -def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; -def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; +} + +let True16Predicate = UseFakeTrue16Insts in { +def : FPMinMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; +def : FPMinMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; +def : FPMinCanonMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; +def : FPMinCanonMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; } let OtherPredicates = [isGFX9Plus] in { diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 34f90b33bc4b..df50113615ea 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -339,8 +339,7 @@ let FPDPRounding = 1 in { } // End Predicates = [Has16BitInsts, isGFX8Only] let SubtargetPredicate = isGFX9Plus in { - defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", - VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>; + defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>; defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>; } // End SubtargetPredicate = isGFX9Plus } // End FPDPRounding = 1 @@ -643,8 +642,8 @@ defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>; defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; -defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; -defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; +defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; +defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>; @@ -1375,8 +1374,8 @@ class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR let SubtargetPredicate = isGFX11Plus in { defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; - defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>; - defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>; + defm V_MAXMIN_F16 : VOP3Inst_t16<"v_maxmin_f16", VOP_F16_F16_F16_F16>; + defm V_MINMAX_F16 : VOP3Inst_t16<"v_minmax_f16", VOP_F16_F16_F16_F16>; defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>; @@ -1720,7 +1719,7 @@ defm V_MED3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24f, "v_ defm V_MED3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x250, "v_med3_i16">; defm V_MED3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x251, "v_med3_u16">; defm V_MAD_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x253, "v_mad_i16", "V_MAD_I16_gfx9">; -defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; +defm V_DIV_FIXUP_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x254, "v_div_fixup_f16", "V_DIV_FIXUP_F16_gfx9">; defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>; defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>; defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>; @@ -1731,8 +1730,8 @@ defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>; defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>; defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>; defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>; -defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>; -defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>; +defm V_MAXMIN_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x260, "v_maxmin_f16">; +defm V_MINMAX_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x261, "v_minmax_f16">; defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>; defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>; defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>; @@ -1755,8 +1754,8 @@ defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30 defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">; defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">; defm V_PACK_B32_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">; -defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >; -defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >; +defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x312, "v_cvt_pk_norm_i16_f16", "V_CVT_PKNORM_I16_F16", "v_cvt_pknorm_i16_f16">; +defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x313, "v_cvt_pk_norm_u16_f16", "V_CVT_PKNORM_U16_F16", "v_cvt_pknorm_u16_f16">; defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">; defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">; defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 689f3b38a172..1afd68767cd3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -405,16 +405,16 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", let OtherPredicates = [HasDot7Insts] in { defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", - VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>; + VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>; defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", - VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>; + VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>; } // End OtherPredicates = [HasDot7Insts] let OtherPredicates = [HasDot1Insts] in { defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", - VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>; + VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>; defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", - VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>; + VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>; } // End OtherPredicates = [HasDot1Insts] def DOT2_BF16_Profile @@ -433,7 +433,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile, multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> { let IsDOT = 1 in - defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, + defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, null_frag, 1>; // Dot-iu instructions consider input as signed if imod neg bits are set. Thus // Dot-iu Intrinsics have extra operands and require separate codegen pattern. diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 0f568ba90a9e..930ed9a5e2d0 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1454,12 +1454,13 @@ class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> { bit IsMAI = MAI; } -def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; -def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; -def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; -def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; -def VOP3_MAI : VOP3Features<0, 0, 0, 1>; -def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>; +def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; +def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; +def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; +def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; +def VOP3_PACKED_NO_OPSEL : VOP3Features<1, 0, 1, 0>; +def VOP3_MAI : VOP3Features<0, 0, 0, 1>; +def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>; // Packed is misleading, but it enables the appropriate op_sel // modifiers. @@ -1908,8 +1909,8 @@ multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName multiclass VOP3_Realtriple_t16_and_fake16_gfx11<bits<10> op, string asmName, string opName = NAME, string pseudo_mnemonic = "", bit isSingle = 0> { - defm _t16: VOP3_Realtriple_t16_gfx11<op, opName#"_t16", asmName, pseudo_mnemonic, isSingle>; - defm _fake16: VOP3_Realtriple_t16_gfx11<op, opName#"_fake16", asmName, pseudo_mnemonic, isSingle>; + defm _t16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>; + defm _fake16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>; } multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName, |
