summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuy David <guyda96@gmail.com>2025-07-17 21:20:14 +0300
committerGuy David <guyda96@gmail.com>2025-07-17 21:20:14 +0300
commit147d2c683e104d914b28592c7239f77d44a10e6a (patch)
treecbf49d39feba27d85c2539afea2305c2832b11c6
parentc4d4e761ef27d6dd27323cf3efa506db5e9e3457 (diff)
[AArch64] Keep floating-point conversion in SIMDusers/guy-david/aarch64-n2i-keep-in-simd-v2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp50
-rw-r--r--llvm/test/CodeGen/AArch64/store-float-conversion.ll117
2 files changed, 167 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4f13a14d2464..3cbba25f7729 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24016,6 +24016,54 @@ static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG,
Store->getMemOperand());
}
+// Combine store (fp_to_int X) with optional extensions/trunctions to use vector
+// semantics when NEON is available.
+static void combineFPToIntStore(StoreSDNode *ST,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->isNeonAvailable())
+ return;
+
+ SDValue Value = ST->getValue();
+
+ // Peel
+ for (;;) {
+ if (!Value->hasOneUse())
+ break;
+ if (!ISD::isExtOpcode(Value.getOpcode()) &&
+ Value.getOpcode() != ISD::TRUNCATE && !Value->isAssert())
+ break;
+ Value = Value.getOperand(0);
+ }
+
+ if (Value.getOpcode() != ISD::FP_TO_UINT &&
+ Value.getOpcode() != ISD::FP_TO_SINT)
+ return;
+ if (!Value.hasOneUse())
+ return;
+
+ SDValue FPSrc = Value.getOperand(0);
+ EVT SrcVT = FPSrc.getValueType();
+ if (SrcVT.isVector())
+ return;
+
+ EVT VecSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, 1);
+ EVT DstVT = MVT::getIntegerVT(SrcVT.getScalarSizeInBits());
+ EVT VecDstVT = EVT::getVectorVT(*DAG.getContext(), DstVT, 1);
+ SDLoc DL(ST);
+ SDValue UndefVec = DAG.getUNDEF(VecSrcVT);
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ SDValue VecFP =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecSrcVT, UndefVec, FPSrc, Zero);
+
+ SDValue VecConv = DAG.getNode(Value.getOpcode(), DL, VecDstVT, VecFP);
+ SDValue Extracted =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DstVT, VecConv, Zero);
+
+ DCI.CombineTo(Value.getNode(), Extracted);
+}
+
bool isHalvingTruncateOfLegalScalableType(EVT SrcVT, EVT DstVT) {
return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv8i8) ||
(SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv4i16) ||
@@ -24098,6 +24146,8 @@ static SDValue performSTORECombine(SDNode *N,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(ST);
+ combineFPToIntStore(ST, DCI, DAG, Subtarget);
+
auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
EVT EltVT = VT.getVectorElementType();
return EltVT == MVT::f32 || EltVT == MVT::f64;
diff --git a/llvm/test/CodeGen/AArch64/store-float-conversion.ll b/llvm/test/CodeGen/AArch64/store-float-conversion.ll
new file mode 100644
index 000000000000..ca12fcb1dcc1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/store-float-conversion.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=aarch64 < %s | FileCheck %s
+
+define void @f32_to_u8(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_u8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: str b0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %f to i32
+ %trunc = trunc i32 %conv to i8
+ store i8 %trunc, ptr %dst
+ ret void
+}
+
+define void @f32_to_s8(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_s8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: str b0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %f to i32
+ %trunc = trunc i32 %conv to i8
+ store i8 %trunc, ptr %dst
+ ret void
+}
+
+define void @f32_to_u16(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %f to i32
+ %trunc = trunc i32 %conv to i16
+ store i16 %trunc, ptr %dst
+ ret void
+}
+
+define void @f32_to_s16(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %f to i32
+ %trunc = trunc i32 %conv to i16
+ store i16 %trunc, ptr %dst
+ ret void
+}
+
+define void @f32_to_u32(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %f to i32
+ store i32 %conv, ptr %dst
+ ret void
+}
+
+define void @f32_to_s32(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %f to i32
+ store i32 %conv, ptr %dst
+ ret void
+}
+
+define void @f64_to_u64(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui double %d to i64
+ store i64 %conv, ptr %dst
+ ret void
+}
+
+define void @f64_to_s64(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %d to i64
+ store i64 %conv, ptr %dst
+ ret void
+}
+
+define i32 @f32_to_i32_multiple_uses(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_i32_multiple_uses:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: mov x9, x0
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: strb w8, [x9]
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %f to i32
+ %trunc = trunc i32 %conv to i8
+ store i8 %trunc, ptr %dst
+ ret i32 %conv
+}