diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.td')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 174 |
1 files changed, 143 insertions, 31 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 89f88776d832..f0020a9a3c91 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -989,6 +989,17 @@ def AArch64fcvtxnv: PatFrags<(ops node:$Rn), [(int_aarch64_neon_fcvtxn node:$Rn), (AArch64fcvtxn_n node:$Rn)]>; +def AArch64fcvtzs_half : SDNode<"AArch64ISD::FCVTZS_HALF", SDTFPExtendOp>; +def AArch64fcvtzu_half : SDNode<"AArch64ISD::FCVTZU_HALF", SDTFPExtendOp>; +def AArch64fcvtas_half : SDNode<"AArch64ISD::FCVTAS_HALF", SDTFPExtendOp>; +def AArch64fcvtau_half : SDNode<"AArch64ISD::FCVTAU_HALF", SDTFPExtendOp>; +def AArch64fcvtms_half : SDNode<"AArch64ISD::FCVTMS_HALF", SDTFPExtendOp>; +def AArch64fcvtmu_half : SDNode<"AArch64ISD::FCVTMU_HALF", SDTFPExtendOp>; +def AArch64fcvtns_half : SDNode<"AArch64ISD::FCVTNS_HALF", SDTFPExtendOp>; +def AArch64fcvtnu_half : SDNode<"AArch64ISD::FCVTNU_HALF", SDTFPExtendOp>; +def AArch64fcvtps_half : SDNode<"AArch64ISD::FCVTPS_HALF", SDTFPExtendOp>; +def AArch64fcvtpu_half : SDNode<"AArch64ISD::FCVTPU_HALF", SDTFPExtendOp>; + //def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>; // Vector immediate ops @@ -2155,7 +2166,7 @@ let Predicates = [HasPAuth] in { i64imm:$Disc, GPR64:$AddrDisc), [], "$AuthVal = $Val">, Sched<[WriteI, ReadI]> { let isCodeGenOnly = 1; - let hasSideEffects = 0; + let hasSideEffects = 1; let mayStore = 0; let mayLoad = 0; let Size = 32; @@ -2660,13 +2671,17 @@ defm ADD : AddSub<0, "add", "sub", add>; defm SUB : AddSub<1, "sub", "add">; def : InstAlias<"mov $dst, $src", - (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; + (ADDWri GPR32sponly:$dst, GPR32sp:$src, + (addsub_shifted_imm32 0, 0))>; def : InstAlias<"mov $dst, $src", - (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; + (ADDWri GPR32sp:$dst, GPR32sponly:$src, + (addsub_shifted_imm32 0, 0))>; def : InstAlias<"mov $dst, $src", - (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; + (ADDXri GPR64sponly:$dst, GPR64sp:$src, + (addsub_shifted_imm64 0, 0))>; def : InstAlias<"mov $dst, $src", - (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; + (ADDXri GPR64sp:$dst, GPR64sponly:$src, + (addsub_shifted_imm64 0, 0))>; defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; @@ -2726,19 +2741,31 @@ def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; } -def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; -def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; +def : InstAlias<"neg $dst, $src", + (SUBWrs GPR32:$dst, WZR, + (arith_shifted_reg32 GPR32:$src, 0)), 3>; +def : InstAlias<"neg $dst, $src", + (SUBXrs GPR64:$dst, XZR, + (arith_shifted_reg64 GPR64:$src, 0)), 3>; def : InstAlias<"neg $dst, $src$shift", - (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; + (SUBWrs GPR32:$dst, WZR, + (arith_shifted_reg32 GPR32:$src, arith_shift32:$shift)), 2>; def : InstAlias<"neg $dst, $src$shift", - (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; - -def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; -def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; + (SUBXrs GPR64:$dst, XZR, + (arith_shifted_reg64 GPR64:$src, arith_shift64:$shift)), 2>; + +def : InstAlias<"negs $dst, $src", + (SUBSWrs GPR32:$dst, WZR, + (arith_shifted_reg32 GPR32:$src, 0)), 3>; +def : InstAlias<"negs $dst, $src", + (SUBSXrs GPR64:$dst, XZR, + (arith_shifted_reg64 GPR64:$src, 0)), 3>; def : InstAlias<"negs $dst, $src$shift", - (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; + (SUBSWrs GPR32:$dst, WZR, + (arith_shifted_reg32 GPR32:$src, arith_shift32:$shift)), 2>; def : InstAlias<"negs $dst, $src$shift", - (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; + (SUBSXrs GPR64:$dst, XZR, + (arith_shifted_reg64 GPR64:$src, arith_shift64:$shift)), 2>; // Unsigned/Signed divide @@ -3165,16 +3192,26 @@ defm ORN : LogicalReg<0b01, 1, "orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>>; defm ORR : LogicalReg<0b01, 0, "orr", or>; -def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; -def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; - -def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; -def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; +def : InstAlias<"mov $dst, $src", + (ORRWrs GPR32:$dst, WZR, + (logical_shifted_reg32 GPR32:$src, 0)), 2>; +def : InstAlias<"mov $dst, $src", + (ORRXrs GPR64:$dst, XZR, + (logical_shifted_reg64 GPR64:$src, 0)), 2>; + +def : InstAlias<"mvn $Wd, $Wm", + (ORNWrs GPR32:$Wd, WZR, + (logical_shifted_reg32 GPR32:$Wm, 0)), 3>; +def : InstAlias<"mvn $Xd, $Xm", + (ORNXrs GPR64:$Xd, XZR, + (logical_shifted_reg64 GPR64:$Xm, 0)), 3>; def : InstAlias<"mvn $Wd, $Wm$sh", - (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; + (ORNWrs GPR32:$Wd, WZR, + (logical_shifted_reg32 GPR32:$Wm, logical_shift32:$sh)), 2>; def : InstAlias<"mvn $Xd, $Xm$sh", - (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; + (ORNXrs GPR64:$Xd, XZR, + (logical_shifted_reg64 GPR64:$Xm, logical_shift64:$sh)), 2>; def : InstAlias<"tst $src1, $src2", (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; @@ -3182,14 +3219,18 @@ def : InstAlias<"tst $src1, $src2", (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; def : InstAlias<"tst $src1, $src2", - (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; + (ANDSWrs WZR, GPR32:$src1, + (logical_shifted_reg32 GPR32:$src2, 0)), 3>; def : InstAlias<"tst $src1, $src2", - (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; + (ANDSXrs XZR, GPR64:$src1, + (logical_shifted_reg64 GPR64:$src2, 0)), 3>; def : InstAlias<"tst $src1, $src2$sh", - (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; + (ANDSWrs WZR, GPR32:$src1, + (logical_shifted_reg32 GPR32:$src2, logical_shift32:$sh)), 2>; def : InstAlias<"tst $src1, $src2$sh", - (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; + (ANDSXrs XZR, GPR64:$src1, + (logical_shifted_reg64 GPR64:$src2, logical_shift64:$sh)), 2>; def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; @@ -4710,6 +4751,26 @@ let Predicates = [IsLE] in { (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; } +// truncstorei32 of f64 bitcasted to i64 +def : Pat<(truncstorei32 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), + (STRSui (EXTRACT_SUBREG FPR64:$Rt, ssub), GPR64sp:$Rn, uimm12s4:$offset)>; + +// truncstorei16 of f64 bitcasted to i64 +def : Pat<(truncstorei16 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHui (f16 (EXTRACT_SUBREG FPR64:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$offset)>; + + // truncstorei16 of f32 bitcasted to i32 +def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)), + (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>; + + // truncstorei8 of f64 bitcasted to i64 +def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)), + (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>; + + // truncstorei8 of f32 bitcasted to i32 +def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)), + (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>; + // truncstore i64 def : Pat<(truncstorei32 GPR64:$Rt, (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), @@ -6536,9 +6597,33 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; +// f16 -> s16 conversions +let Predicates = [HasFullFP16] in { + def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>; + def : Pat<(i16(fp_to_uint_sat_gi f16:$Rn)), (FCVTZUv1f16 f16:$Rn)>; +} + def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), (CMLTv1i64rz V64:$Rn)>; +// f16 -> i16 conversions leave the bit pattern in a f32 +class F16ToI16ScalarPat<SDNode cvt_isd, BaseSIMDTwoScalar instr> + : Pat<(f32 (cvt_isd (f16 FPR16:$Rn))), + (f32 (SUBREG_TO_REG (i64 0), (instr FPR16:$Rn), hsub))>; + +let Predicates = [HasFullFP16] in { +def : F16ToI16ScalarPat<AArch64fcvtzs_half, FCVTZSv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtzu_half, FCVTZUv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtas_half, FCVTASv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtau_half, FCVTAUv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtms_half, FCVTMSv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtmu_half, FCVTMUv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtns_half, FCVTNSv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtnu_half, FCVTNUv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtps_half, FCVTPSv1f16>; +def : F16ToI16ScalarPat<AArch64fcvtpu_half, FCVTPUv1f16>; +} + // Round FP64 to BF16. let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))), @@ -6641,20 +6726,24 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), // Some float -> int -> float conversion patterns for which we want to keep the // int values in FP registers using the corresponding NEON instructions to // avoid more costly int <-> fp register transfers. +let HasOneUse = 1 in { +def any_fp_to_sint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_sint $src0)>; +def any_fp_to_uint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_uint $src0)>; +} let Predicates = [HasNEONandIsSME2p2StreamingSafe] in { -def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), +def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint_oneuse f64:$Rn)))), (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; -def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), +def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f32:$Rn)))), (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; -def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), +def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint_oneuse f64:$Rn)))), (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; -def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), +def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { -def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), +def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; -def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), +def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f16:$Rn)))), (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; } @@ -8234,6 +8323,29 @@ def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; } +// SABA patterns for add(x, abs(y)) -> saba(x, y, 0) +def : Pat<(v8i8 (add V64:$Vn, (abs V64:$Vm))), + (SABAv8i8 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>; +def : Pat<(v4i16 (add V64:$Vn, (abs V64:$Vm))), + (SABAv4i16 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>; +def : Pat<(v2i32 (add V64:$Vn, (abs V64:$Vm))), + (SABAv2i32 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>; +def : Pat<(v16i8 (add V128:$Vn, (abs V128:$Vm))), + (SABAv16i8 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>; +def : Pat<(v8i16 (add V128:$Vn, (abs V128:$Vm))), + (SABAv8i16 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>; +def : Pat<(v4i32 (add V128:$Vn, (abs V128:$Vm))), + (SABAv4i32 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>; + +// SABAL patterns for add(x, zext(abs(y))) -> sabal(x, y, 0) +def : Pat<(v8i16 (add V128:$Vn, (zext (abs (v8i8 V64:$Vm))))), + (SABALv8i8_v8i16 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>; +def : Pat<(v4i32 (add V128:$Vn, (zext (abs (v4i16 V64:$Vm))))), + (SABALv4i16_v4i32 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>; +def : Pat<(v2i64 (add V128:$Vn, (zext (abs (v2i32 V64:$Vm))))), + (SABALv2i32_v2i64 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>; + + //---------------------------------------------------------------------------- // AdvSIMD indexed element //---------------------------------------------------------------------------- |
