summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@linaro.org>2025-10-02 13:52:22 +0000
committerChristophe Lyon <christophe.lyon@linaro.org>2025-11-12 13:25:54 +0000
commita48912e9db50915746d5fee545293e97648210bc (patch)
tree7a6366f24126196076e54fc3877d04eb5d01d5e5
parentf24307422d1d15f99b2f8af19894e573f7036fb1 (diff)
arm: [MVE] Fix carry-in support for vadcq / vsbcq [PR122189]
The vadcq and vsbcq patterns had two problems: - the adc / sbc part of the pattern did not mention the use of vfpcc - the carry calcultation part should use a different unspec code In addtion, the get_fpscr_nzcvqc and set_fpscr_nzcvqc were over-cautious by using unspec_volatile when unspec is really what they need. Making them unspec enables to remove redundant accesses to FPSCR_nzcvqc. With unspec_volatile, we used to generate: test_2: @ args = 0, pretend = 0, frame = 8 @ frame_needed = 0, uses_anonymous_args = 0 vmov.i32 q0, #0x1 @ v4si push {lr} sub sp, sp, #12 vmrs r3, FPSCR_nzcvqc ;; [1] bic r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q3, q0, q0 vmrs r3, FPSCR_nzcvqc ;; [2] vmrs r3, FPSCR_nzcvqc orr r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q0, q0, q0 vmrs r3, FPSCR_nzcvqc ldr r0, .L8 ubfx r3, r3, #29, #1 str r3, [sp, #4] bl print_uint32x4_t add sp, sp, #12 @ sp needed pop {pc} .L9: .align 2 .L8: .word .LC1 with unspec, we generate: test_2: @ args = 0, pretend = 0, frame = 8 @ frame_needed = 0, uses_anonymous_args = 0 vmrs r3, FPSCR_nzcvqc ;; [1] bic r3, r3, #536870912 ;; [3] vmov.i32 q0, #0x1 @ v4si vmsr FPSCR_nzcvqc, r3 vadc.i32 q3, q0, q0 vmrs r3, FPSCR_nzcvqc orr r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q0, q0, q0 vmrs r3, FPSCR_nzcvqc push {lr} ubfx r3, r3, #29, #1 sub sp, sp, #12 ldr r0, .L8 str r3, [sp, #4] bl print_uint32x4_t add sp, sp, #12 @ sp needed pop {pc} .L9: .align 2 .L8: .word .LC1 That is, unspec in get_fpscr_nzcvqc enables to: - move [1] earlier - delete redundant [2] and unspec in set_fpscr_nzcvqc enables to move push {lr} and stack manipulation later. gcc/ChangeLog: PR target/122189 * config/arm/iterators.md (VxCIQ_carry, VxCIQ_M_carry, VxCQ_carry) (VxCQ_M_carry): New iterators. * config/arm/mve.md (get_fpscr_nzcvqc, set_fpscr_nzcvqc): Use unspec instead of unspec_volatile. (vadciq, vadciq_m, vadcq, vadcq_m): Use vfpcc in operation. Use a different unspec code for carry calcultation. * config/arm/unspecs.md (VADCQ_U_carry, VADCQ_M_U_carry) (VADCQ_S_carry, VADCQ_M_S_carry, VSBCIQ_U_carry ,VSBCIQ_S_carry ,VSBCIQ_M_U_carry ,VSBCIQ_M_S_carry ,VSBCQ_U_carry ,VSBCQ_S_carry ,VSBCQ_M_U_carry ,VSBCQ_M_S_carry ,VADCIQ_U_carry ,VADCIQ_M_U_carry ,VADCIQ_S_carry ,VADCIQ_M_S_carry): New unspec codes. gcc/testsuite/ChangeLog: PR target/122189 * gcc.target/arm/mve/intrinsics/vadcq-check-carry.c: New test. * gcc.target/arm/mve/intrinsics/vadcq_m_s32.c: Adjust instructions order. * gcc.target/arm/mve/intrinsics/vadcq_m_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c: Likewise. (cherry picked from commits 027205879733933ec991c230795da6c01ac50029 and 697ccadd7217316ea91ddd978ddc944e6df09522)
-rw-r--r--gcc/config/arm/iterators.md17
-rw-r--r--gcc/config/arm/mve.md36
-rw-r--r--gcc/config/arm/unspecs.md16
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c48
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c2
8 files changed, 109 insertions, 16 deletions
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 743fe48e6cc..d1126e76720 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -3022,3 +3022,20 @@
;; Define iterators for VCMLA operations as MUL
(define_int_iterator VCMUL_OP [UNSPEC_VCMUL
UNSPEC_VCMUL_CONJ])
+
+(define_int_attr VxCIQ_carry [(VADCIQ_U "VADCIQ_U_carry")
+ (VADCIQ_S "VADCIQ_S_carry")
+ (VSBCIQ_U "VSBCIQ_U_carry")
+ (VSBCIQ_S "VSBCIQ_S_carry")])
+(define_int_attr VxCIQ_M_carry [(VADCIQ_M_U "VADCIQ_M_U_carry")
+ (VADCIQ_M_S "VADCIQ_M_S_carry")
+ (VSBCIQ_M_U "VSBCIQ_M_U_carry")
+ (VSBCIQ_M_S "VSBCIQ_M_S_carry")])
+(define_int_attr VxCQ_carry [(VADCQ_U "VADCQ_U_carry")
+ (VADCQ_S "VADCQ_S_carry")
+ (VSBCQ_U "VSBCQ_U_carry")
+ (VSBCQ_S "VSBCQ_S_carry")])
+(define_int_attr VxCQ_M_carry [(VADCQ_M_U "VADCQ_M_U_carry")
+ (VADCQ_M_S "VADCQ_M_S_carry")
+ (VSBCQ_M_U "VSBCQ_M_U_carry")
+ (VSBCQ_M_S "VSBCQ_M_S_carry")])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index cc266f89cdf..1ec3b2900f9 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -3965,14 +3965,14 @@
(define_insn "get_fpscr_nzcvqc"
[(set (match_operand:SI 0 "register_operand" "=r")
- (unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
+ (unspec:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
"TARGET_HAVE_MVE"
"vmrs\\t%0, FPSCR_nzcvqc"
[(set_attr "type" "mve_move")])
(define_insn "set_fpscr_nzcvqc"
[(set (reg:SI VFPCC_REGNUM)
- (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+ (unspec:SI [(match_operand:SI 0 "register_operand" "r")]
VUNSPEC_SET_FPSCR_NZCVQC))]
"TARGET_HAVE_MVE"
"vmsr\\tFPSCR_nzcvqc, %0"
@@ -3988,8 +3988,9 @@
(match_operand:V4SI 2 "s_register_operand" "w")]
VxCIQ))
(set (reg:SI VFPCC_REGNUM)
- (unspec:SI [(const_int 0)]
- VxCIQ))
+ (unspec:SI [(match_dup 1)
+ (match_dup 2)]
+ <VxCIQ_carry>))
]
"TARGET_HAVE_MVE"
"<mve_insn>.i32\t%q0, %q1, %q2"
@@ -4009,8 +4010,11 @@
(match_operand:V4BI 4 "vpr_register_operand" "Up")]
VxCIQ_M))
(set (reg:SI VFPCC_REGNUM)
- (unspec:SI [(const_int 0)]
- VxCIQ_M))
+ (unspec:SI [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)]
+ <VxCIQ_M_carry>))
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i32\t%q0, %q2, %q3"
@@ -4025,11 +4029,14 @@
(define_insn "@mve_<mve_insn>q_<supf>v4si"
[(set (match_operand:V4SI 0 "s_register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
- (match_operand:V4SI 2 "s_register_operand" "w")]
+ (match_operand:V4SI 2 "s_register_operand" "w")
+ (reg:SI VFPCC_REGNUM)]
VxCQ))
(set (reg:SI VFPCC_REGNUM)
- (unspec:SI [(reg:SI VFPCC_REGNUM)]
- VxCQ))
+ (unspec:SI [(match_dup 1)
+ (match_dup 2)
+ (reg:SI VFPCC_REGNUM)]
+ <VxCQ_carry>))
]
"TARGET_HAVE_MVE"
"<mve_insn>.i32\t%q0, %q1, %q2"
@@ -4047,11 +4054,16 @@
(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0")
(match_operand:V4SI 2 "s_register_operand" "w")
(match_operand:V4SI 3 "s_register_operand" "w")
- (match_operand:V4BI 4 "vpr_register_operand" "Up")]
+ (match_operand:V4BI 4 "vpr_register_operand" "Up")
+ (reg:SI VFPCC_REGNUM)]
VxCQ_M))
(set (reg:SI VFPCC_REGNUM)
- (unspec:SI [(reg:SI VFPCC_REGNUM)]
- VxCQ_M))
+ (unspec:SI [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (reg:SI VFPCC_REGNUM)]
+ <VxCQ_M_carry>))
]
"TARGET_HAVE_MVE"
"vpst\;<mve_insn>t.i32\t%q0, %q2, %q3"
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index a03609d1de4..ecc6d611529 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -1189,21 +1189,37 @@
VLDRGBWBQ
VLDRGBWBQ_Z
VADCQ_U
+ VADCQ_U_carry
VADCQ_M_U
+ VADCQ_M_U_carry
VADCQ_S
+ VADCQ_S_carry
VADCQ_M_S
+ VADCQ_M_S_carry
VSBCIQ_U
+ VSBCIQ_U_carry
VSBCIQ_S
+ VSBCIQ_S_carry
VSBCIQ_M_U
+ VSBCIQ_M_U_carry
VSBCIQ_M_S
+ VSBCIQ_M_S_carry
VSBCQ_U
+ VSBCQ_U_carry
VSBCQ_S
+ VSBCQ_S_carry
VSBCQ_M_U
+ VSBCQ_M_U_carry
VSBCQ_M_S
+ VSBCQ_M_S_carry
VADCIQ_U
+ VADCIQ_U_carry
VADCIQ_M_U
+ VADCIQ_M_U_carry
VADCIQ_S
+ VADCIQ_S_carry
VADCIQ_M_S
+ VADCIQ_M_S_carry
VLD2Q
VLD4Q
VST2Q
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c
new file mode 100644
index 00000000000..3a9b8debf98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <inttypes.h>
+#include <stdio.h>
+
+__attribute((noinline)) void print_uint32x4_t(const char *name, uint32x4_t val)
+{
+ printf("%s: %u, %u, %u, %u\n",
+ name,
+ vgetq_lane_u32(val, 0),
+ vgetq_lane_u32(val, 1),
+ vgetq_lane_u32(val, 2),
+ vgetq_lane_u32(val, 3));
+}
+
+void __attribute__ ((noinline)) test_2(void)
+{
+ uint32x4_t v12, v18, v108;
+ unsigned v17 = 0;
+ v12 = vdupq_n_u32(1);
+ v18 = vadcq_u32(v12, v12, &v17);
+ v17 = 1;
+ v108 = vadcq_u32(v12, v12, &v17);
+ print_uint32x4_t("v108", v108);
+}
+
+int main()
+{
+ test_2();
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-output "v108: 3, 2, 2, 2" } */
+/* { dg-final { scan-assembler-times {\tvmrs\t(?:ip|fp|r[0-9]+), FPSCR_nzcvqc} 3 } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c
index 0d4cb779254..1802c20a397 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c
@@ -1,6 +1,6 @@
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "arm_mve.h"
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c
index a0ba6825d8c..64f221df868 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c
@@ -1,6 +1,6 @@
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "arm_mve.h"
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c
index 7a332610c69..da36d694ddf 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c
@@ -1,6 +1,6 @@
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "arm_mve.h"
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c
index 60902196502..555690f5fb8 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c
@@ -1,6 +1,6 @@
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "arm_mve.h"