summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuy David <guyda96@gmail.com>2025-07-30 21:13:58 +0300
committerGuy David <guyda96@gmail.com>2025-07-30 21:19:54 +0300
commit0c0505d5523c4cd46f266cb12975af10655b2f8f (patch)
treebcf2f2392d3641dd2640ec515cdb16f46c301513
parenta194d516546061078dc217a81655688f1a175ca2 (diff)
[AArch64] Fix post-inc stores of floating-point conversionsusers/guy-david/aarch64-n2i-keep-in-simd-fix
The commit at https://github.com/llvm/llvm-project/pull/147707 introduced a bug because of missing patterns for post-inc stores where the input is a vector_extract with i64 types. Additionally, remove the early pre-legalization early-exit as it can miss its opportunity to apply the optimization.
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td4
-rw-r--r--llvm/test/CodeGen/AArch64/store-float-conversion.ll128
-rw-r--r--llvm/test/CodeGen/AArch64/tbl-loops.ll3
4 files changed, 140 insertions, 5 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4fef93cc5aec..836a5819beba 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24135,9 +24135,6 @@ static SDValue combineStoreValueFPToInt(StoreSDNode *ST,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
- // Limit to post-legalization in order to avoid peeling truncating stores.
- if (DCI.isBeforeLegalize())
- return SDValue();
if (!Subtarget->isNeonAvailable())
return SDValue();
// Source operand is already a vector.
@@ -24174,6 +24171,13 @@ static SDValue combineStoreValueFPToInt(StoreSDNode *ST,
SDValue VecFP = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, FPSrc);
SDValue VecConv = DAG.getNode(Value.getOpcode(), DL, VecDstVT, VecFP);
+ if (ST->isTruncatingStore()) {
+ EVT NewVecDstVT = EVT::getVectorVT(
+ *DAG.getContext(), ST->getMemoryVT(),
+ VecDstVT.getFixedSizeInBits() / ST->getMemoryVT().getFixedSizeInBits());
+ VecConv = DAG.getNode(AArch64ISD::NVCAST, DL, NewVecDstVT, VecConv);
+ }
+
SDValue Zero = DAG.getVectorIdxConstant(0, DL);
SDValue Extracted =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecConv, Zero);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 251fd44b6ea3..a62de87b072e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9273,8 +9273,12 @@ multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
1>;
+defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i64, ST1i8_POST,
+ 1>;
defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
2>;
+defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i64, ST1i16_POST,
+ 2>;
defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
diff --git a/llvm/test/CodeGen/AArch64/store-float-conversion.ll b/llvm/test/CodeGen/AArch64/store-float-conversion.ll
index c46801fc1671..1d4073f673ed 100644
--- a/llvm/test/CodeGen/AArch64/store-float-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/store-float-conversion.ll
@@ -27,6 +27,20 @@ entry:
ret void
}
+define ptr @f32_to_s8_inc(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_s8_inc:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: st1 { v0.b }[0], [x0], #1
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %f to i32
+ %trunc = trunc i32 %conv to i8
+ %next = getelementptr i8, ptr %dst, i64 1
+ store i8 %trunc, ptr %dst
+ ret ptr %next
+}
+
define void @f32_to_u16(float %f, ptr %dst) {
; CHECK-LABEL: f32_to_u16:
; CHECK: // %bb.0: // %entry
@@ -53,6 +67,20 @@ entry:
ret void
}
+define ptr @f32_to_s16_inc(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_s16_inc:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: st1 { v0.h }[0], [x0], #2
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %f to i32
+ %trunc = trunc i32 %conv to i16
+ %next = getelementptr i16, ptr %dst, i64 1
+ store i16 %trunc, ptr %dst
+ ret ptr %next
+}
+
define void @f32_to_u32(float %f, ptr %dst) {
; CHECK-LABEL: f32_to_u32:
; CHECK: // %bb.0: // %entry
@@ -77,6 +105,19 @@ entry:
ret void
}
+define ptr @f32_to_s32_inc(float %f, ptr %dst) {
+; CHECK-LABEL: f32_to_s32_inc:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: st1 { v0.s }[0], [x0], #4
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %f to i32
+ %next = getelementptr i32, ptr %dst, i64 1
+ store i32 %conv, ptr %dst
+ ret ptr %next
+}
+
define void @f32_to_s64(float %f, ptr %dst) {
; CHECK-LABEL: f32_to_s64:
; CHECK: // %bb.0: // %entry
@@ -115,6 +156,93 @@ entry:
ret void
}
+define ptr @f64_to_s64_inc(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_s64_inc:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: st1 { v0.d }[0], [x0], #8
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %d to i64
+ %next = getelementptr i64, ptr %dst, i64 1
+ store i64 %conv, ptr %dst
+ ret ptr %next
+}
+
+define void @f64_to_u8(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: str b0, [x0]
+; CHECK-NEXT: ret
+ %conv = fptoui double %d to i64
+ %trunc = trunc i64 %conv to i8
+ store i8 %trunc, ptr %dst
+ ret void
+}
+
+define void @f64_to_s8(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_s8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: str b0, [x0]
+; CHECK-NEXT: ret
+ %conv = fptosi double %d to i64
+ %trunc = trunc i64 %conv to i8
+ store i8 %trunc, ptr %dst
+ ret void
+}
+
+define ptr @f64_to_s8_inc(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_s8_inc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: st1 { v0.b }[0], [x0], #1
+; CHECK-NEXT: ret
+ %conv = fptosi double %d to i64
+ %trunc = trunc i64 %conv to i8
+ store i8 %trunc, ptr %dst
+ %next = getelementptr i8, ptr %dst, i64 1
+ ret ptr %next
+}
+
+define void @f64_to_u16(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+ %conv = fptoui double %d to i64
+ %trunc = trunc i64 %conv to i16
+ store i16 %trunc, ptr %dst
+ ret void
+}
+
+define void @f64_to_s16(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+ %conv = fptosi double %d to i64
+ %trunc = trunc i64 %conv to i16
+ store i16 %trunc, ptr %dst
+ ret void
+}
+
+define ptr @f64_to_s16_inc(double %d, ptr %dst) {
+; CHECK-LABEL: f64_to_s16_inc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0
+; CHECK-NEXT: st1 { v0.h }[0], [x0], #2
+; CHECK-NEXT: ret
+ %conv = fptosi double %d to i64
+ %trunc = trunc i64 %conv to i16
+ %next = getelementptr i16, ptr %dst, i64 1
+ store i16 %trunc, ptr %dst
+ ret ptr %next
+}
+
define i32 @f32_to_i32_multiple_uses(float %f, ptr %dst) {
; CHECK-LABEL: f32_to_i32_multiple_uses:
; CHECK: // %bb.0: // %entry
diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index 5fc996ad921f..223698ba225a 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -64,8 +64,7 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
; CHECK-NEXT: fcsel s2, s0, s3, mi
; CHECK-NEXT: subs w10, w10, #1
; CHECK-NEXT: fcvtzs s2, s2
-; CHECK-NEXT: fmov w11, s2
-; CHECK-NEXT: strb w11, [x9], #1
+; CHECK-NEXT: st1 { v2.b }[0], [x9], #1
; CHECK-NEXT: b.ne .LBB0_7
; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup
; CHECK-NEXT: ret