summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.td')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td174
1 files changed, 143 insertions, 31 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 89f88776d832..f0020a9a3c91 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -989,6 +989,17 @@ def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
[(int_aarch64_neon_fcvtxn node:$Rn),
(AArch64fcvtxn_n node:$Rn)]>;
+def AArch64fcvtzs_half : SDNode<"AArch64ISD::FCVTZS_HALF", SDTFPExtendOp>;
+def AArch64fcvtzu_half : SDNode<"AArch64ISD::FCVTZU_HALF", SDTFPExtendOp>;
+def AArch64fcvtas_half : SDNode<"AArch64ISD::FCVTAS_HALF", SDTFPExtendOp>;
+def AArch64fcvtau_half : SDNode<"AArch64ISD::FCVTAU_HALF", SDTFPExtendOp>;
+def AArch64fcvtms_half : SDNode<"AArch64ISD::FCVTMS_HALF", SDTFPExtendOp>;
+def AArch64fcvtmu_half : SDNode<"AArch64ISD::FCVTMU_HALF", SDTFPExtendOp>;
+def AArch64fcvtns_half : SDNode<"AArch64ISD::FCVTNS_HALF", SDTFPExtendOp>;
+def AArch64fcvtnu_half : SDNode<"AArch64ISD::FCVTNU_HALF", SDTFPExtendOp>;
+def AArch64fcvtps_half : SDNode<"AArch64ISD::FCVTPS_HALF", SDTFPExtendOp>;
+def AArch64fcvtpu_half : SDNode<"AArch64ISD::FCVTPU_HALF", SDTFPExtendOp>;
+
//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
// Vector immediate ops
@@ -2155,7 +2166,7 @@ let Predicates = [HasPAuth] in {
i64imm:$Disc, GPR64:$AddrDisc),
[], "$AuthVal = $Val">, Sched<[WriteI, ReadI]> {
let isCodeGenOnly = 1;
- let hasSideEffects = 0;
+ let hasSideEffects = 1;
let mayStore = 0;
let mayLoad = 0;
let Size = 32;
@@ -2660,13 +2671,17 @@ defm ADD : AddSub<0, "add", "sub", add>;
defm SUB : AddSub<1, "sub", "add">;
def : InstAlias<"mov $dst, $src",
- (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
+ (ADDWri GPR32sponly:$dst, GPR32sp:$src,
+ (addsub_shifted_imm32 0, 0))>;
def : InstAlias<"mov $dst, $src",
- (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
+ (ADDWri GPR32sp:$dst, GPR32sponly:$src,
+ (addsub_shifted_imm32 0, 0))>;
def : InstAlias<"mov $dst, $src",
- (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
+ (ADDXri GPR64sponly:$dst, GPR64sp:$src,
+ (addsub_shifted_imm64 0, 0))>;
def : InstAlias<"mov $dst, $src",
- (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
+ (ADDXri GPR64sp:$dst, GPR64sponly:$src,
+ (addsub_shifted_imm64 0, 0))>;
defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
@@ -2726,19 +2741,31 @@ def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
}
-def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
-def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
+def : InstAlias<"neg $dst, $src",
+ (SUBWrs GPR32:$dst, WZR,
+ (arith_shifted_reg32 GPR32:$src, 0)), 3>;
+def : InstAlias<"neg $dst, $src",
+ (SUBXrs GPR64:$dst, XZR,
+ (arith_shifted_reg64 GPR64:$src, 0)), 3>;
def : InstAlias<"neg $dst, $src$shift",
- (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
+ (SUBWrs GPR32:$dst, WZR,
+ (arith_shifted_reg32 GPR32:$src, arith_shift32:$shift)), 2>;
def : InstAlias<"neg $dst, $src$shift",
- (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
-
-def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
-def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
+ (SUBXrs GPR64:$dst, XZR,
+ (arith_shifted_reg64 GPR64:$src, arith_shift64:$shift)), 2>;
+
+def : InstAlias<"negs $dst, $src",
+ (SUBSWrs GPR32:$dst, WZR,
+ (arith_shifted_reg32 GPR32:$src, 0)), 3>;
+def : InstAlias<"negs $dst, $src",
+ (SUBSXrs GPR64:$dst, XZR,
+ (arith_shifted_reg64 GPR64:$src, 0)), 3>;
def : InstAlias<"negs $dst, $src$shift",
- (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
+ (SUBSWrs GPR32:$dst, WZR,
+ (arith_shifted_reg32 GPR32:$src, arith_shift32:$shift)), 2>;
def : InstAlias<"negs $dst, $src$shift",
- (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
+ (SUBSXrs GPR64:$dst, XZR,
+ (arith_shifted_reg64 GPR64:$src, arith_shift64:$shift)), 2>;
// Unsigned/Signed divide
@@ -3165,16 +3192,26 @@ defm ORN : LogicalReg<0b01, 1, "orn",
BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
defm ORR : LogicalReg<0b01, 0, "orr", or>;
-def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
-def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
-
-def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
-def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
+def : InstAlias<"mov $dst, $src",
+ (ORRWrs GPR32:$dst, WZR,
+ (logical_shifted_reg32 GPR32:$src, 0)), 2>;
+def : InstAlias<"mov $dst, $src",
+ (ORRXrs GPR64:$dst, XZR,
+ (logical_shifted_reg64 GPR64:$src, 0)), 2>;
+
+def : InstAlias<"mvn $Wd, $Wm",
+ (ORNWrs GPR32:$Wd, WZR,
+ (logical_shifted_reg32 GPR32:$Wm, 0)), 3>;
+def : InstAlias<"mvn $Xd, $Xm",
+ (ORNXrs GPR64:$Xd, XZR,
+ (logical_shifted_reg64 GPR64:$Xm, 0)), 3>;
def : InstAlias<"mvn $Wd, $Wm$sh",
- (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
+ (ORNWrs GPR32:$Wd, WZR,
+ (logical_shifted_reg32 GPR32:$Wm, logical_shift32:$sh)), 2>;
def : InstAlias<"mvn $Xd, $Xm$sh",
- (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
+ (ORNXrs GPR64:$Xd, XZR,
+ (logical_shifted_reg64 GPR64:$Xm, logical_shift64:$sh)), 2>;
def : InstAlias<"tst $src1, $src2",
(ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
@@ -3182,14 +3219,18 @@ def : InstAlias<"tst $src1, $src2",
(ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
def : InstAlias<"tst $src1, $src2",
- (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
+ (ANDSWrs WZR, GPR32:$src1,
+ (logical_shifted_reg32 GPR32:$src2, 0)), 3>;
def : InstAlias<"tst $src1, $src2",
- (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
+ (ANDSXrs XZR, GPR64:$src1,
+ (logical_shifted_reg64 GPR64:$src2, 0)), 3>;
def : InstAlias<"tst $src1, $src2$sh",
- (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
+ (ANDSWrs WZR, GPR32:$src1,
+ (logical_shifted_reg32 GPR32:$src2, logical_shift32:$sh)), 2>;
def : InstAlias<"tst $src1, $src2$sh",
- (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
+ (ANDSXrs XZR, GPR64:$src1,
+ (logical_shifted_reg64 GPR64:$src2, logical_shift64:$sh)), 2>;
def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
@@ -4710,6 +4751,26 @@ let Predicates = [IsLE] in {
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
}
+// truncstorei32 of f64 bitcasted to i64
+def : Pat<(truncstorei32 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
+ (STRSui (EXTRACT_SUBREG FPR64:$Rt, ssub), GPR64sp:$Rn, uimm12s4:$offset)>;
+
+// truncstorei16 of f64 bitcasted to i64
+def : Pat<(truncstorei16 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (f16 (EXTRACT_SUBREG FPR64:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$offset)>;
+
+ // truncstorei16 of f32 bitcasted to i32
+def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)),
+ (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
+
+ // truncstorei8 of f64 bitcasted to i64
+def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+ (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+
+ // truncstorei8 of f32 bitcasted to i32
+def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+ (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+
// truncstore i64
def : Pat<(truncstorei32 GPR64:$Rt,
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
@@ -6536,9 +6597,33 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
+// f16 -> s16 conversions
+let Predicates = [HasFullFP16] in {
+ def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
+ def : Pat<(i16(fp_to_uint_sat_gi f16:$Rn)), (FCVTZUv1f16 f16:$Rn)>;
+}
+
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
(CMLTv1i64rz V64:$Rn)>;
+// f16 -> i16 conversions leave the bit pattern in a f32
+class F16ToI16ScalarPat<SDNode cvt_isd, BaseSIMDTwoScalar instr>
+ : Pat<(f32 (cvt_isd (f16 FPR16:$Rn))),
+ (f32 (SUBREG_TO_REG (i64 0), (instr FPR16:$Rn), hsub))>;
+
+let Predicates = [HasFullFP16] in {
+def : F16ToI16ScalarPat<AArch64fcvtzs_half, FCVTZSv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtzu_half, FCVTZUv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtas_half, FCVTASv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtau_half, FCVTAUv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtms_half, FCVTMSv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtmu_half, FCVTMUv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtns_half, FCVTNSv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtnu_half, FCVTNUv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtps_half, FCVTPSv1f16>;
+def : F16ToI16ScalarPat<AArch64fcvtpu_half, FCVTPUv1f16>;
+}
+
// Round FP64 to BF16.
let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
@@ -6641,20 +6726,24 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
// Some float -> int -> float conversion patterns for which we want to keep the
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
+let HasOneUse = 1 in {
+def any_fp_to_sint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_sint $src0)>;
+def any_fp_to_uint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_uint $src0)>;
+}
let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
-def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
+def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint_oneuse f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
-def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
+def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f32:$Rn)))),
(SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
-def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
+def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint_oneuse f64:$Rn)))),
(UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
-def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
+def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
-def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
+def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
-def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
+def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f16:$Rn)))),
(UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
}
@@ -8234,6 +8323,29 @@ def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
(AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
}
+// SABA patterns for add(x, abs(y)) -> saba(x, y, 0)
+def : Pat<(v8i8 (add V64:$Vn, (abs V64:$Vm))),
+ (SABAv8i8 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v4i16 (add V64:$Vn, (abs V64:$Vm))),
+ (SABAv4i16 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v2i32 (add V64:$Vn, (abs V64:$Vm))),
+ (SABAv2i32 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v16i8 (add V128:$Vn, (abs V128:$Vm))),
+ (SABAv16i8 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>;
+def : Pat<(v8i16 (add V128:$Vn, (abs V128:$Vm))),
+ (SABAv8i16 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>;
+def : Pat<(v4i32 (add V128:$Vn, (abs V128:$Vm))),
+ (SABAv4i32 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>;
+
+// SABAL patterns for add(x, zext(abs(y))) -> sabal(x, y, 0)
+def : Pat<(v8i16 (add V128:$Vn, (zext (abs (v8i8 V64:$Vm))))),
+ (SABALv8i8_v8i16 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v4i32 (add V128:$Vn, (zext (abs (v4i16 V64:$Vm))))),
+ (SABALv4i16_v4i32 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v2i64 (add V128:$Vn, (zext (abs (v2i32 V64:$Vm))))),
+ (SABALv2i32_v2i64 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+
+
//----------------------------------------------------------------------------
// AdvSIMD indexed element
//----------------------------------------------------------------------------