summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2025-01-13 11:22:55 +0700
committerMatt Arsenault <arsenm2@gmail.com>2025-11-11 16:47:47 -0800
commitafe4cffcca614618efd300a790c17d322c12f4bc (patch)
tree806463a80fa08eca1b285ef594e2525f937b7da1 /llvm/test/CodeGen/X86/insert-into-constant-vector.ll
parent79d9ae7a777a03452991d222642ffdb6687d9210 (diff)
DAG: Handle load in SimplifyDemandedVectorEltsusers/arsenm/dag/simplify-demanded-vector-elts-load
This improves some AMDGPU cases and avoids future regressions. The combiner likes to form shuffles for cases where an extract_vector_elt would do perfectly well, and this recovers some of the regressions from losing load narrowing. AMDGPU, Arch64 and RISCV test changes look broadly better. Other targets have some improvements, but mostly regressions. In particular X86 looks much worse. I'm guessing this is because it's shouldReduceLoadWidth is wrong. I mostly just regenerated the checks. I assume some set of them should switch to use volatile loads to defeat the optimization.
Diffstat (limited to 'llvm/test/CodeGen/X86/insert-into-constant-vector.ll')
-rw-r--r--llvm/test/CodeGen/X86/insert-into-constant-vector.ll52
1 files changed, 26 insertions, 26 deletions
diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
index c44945ac2d92..20aa93bf10ec 100644
--- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
+++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
@@ -145,9 +145,9 @@ define <2 x i64> @elt0_v2i64(i64 %x) {
;
; X64-SSE2-LABEL: elt0_v2i64:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movq %rdi, %xmm1
-; X64-SSE2-NEXT: movapd {{.*#+}} xmm0 = [u,1]
-; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X64-SSE2-NEXT: movq %rdi, %xmm0
+; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-SSE2-NEXT: retq
;
; X64-SSE4-LABEL: elt0_v2i64:
@@ -218,28 +218,26 @@ define <4 x float> @elt1_v4f32(float %x) {
define <2 x double> @elt1_v2f64(double %x) {
; X86-SSE-LABEL: elt1_v2f64:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,u]
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X86-SSE-NEXT: retl
;
; X64-SSE-LABEL: elt1_v2f64:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u]
+; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-SSE-NEXT: movaps %xmm1, %xmm0
; X64-SSE-NEXT: retq
;
; X86-AVX-LABEL: elt1_v2f64:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = [4.2E+1,4.2E+1]
-; X86-AVX-NEXT: # xmm0 = mem[0,0]
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X86-AVX-NEXT: retl
;
; X64-AVX-LABEL: elt1_v2f64:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
-; X64-AVX-NEXT: # xmm1 = mem[0,0]
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X64-AVX-NEXT: retq
%ins = insertelement <2 x double> <double 42.0, double 1.0>, double %x, i32 1
@@ -384,7 +382,7 @@ define <8 x i64> @elt5_v8i64(i64 %x) {
; X64-SSE2-LABEL: elt5_v8i64:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movq %rdi, %xmm0
-; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,u]
+; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = [4,0]
; X64-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1]
; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3]
@@ -457,7 +455,7 @@ define <8 x i64> @elt5_v8i64(i64 %x) {
define <8 x double> @elt1_v8f64(double %x) {
; X86-SSE-LABEL: elt1_v8f64:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,u]
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
@@ -466,7 +464,7 @@ define <8 x double> @elt1_v8f64(double %x) {
;
; X64-SSE-LABEL: elt1_v8f64:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [4.2E+1,u]
+; X64-SSE-NEXT: movsd {{.*#+}} xmm4 = [4.2E+1,0.0E+0]
; X64-SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
@@ -476,47 +474,49 @@ define <8 x double> @elt1_v8f64(double %x) {
;
; X86-AVX1-LABEL: elt1_v8f64:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [4.2E+1,u,2.0E+0,3.0E+0]
-; X86-AVX1-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
-; X86-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; X86-AVX1-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; X86-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; X86-AVX1-NEXT: retl
;
; X64-AVX1-LABEL: elt1_v8f64:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.2E+1,u,2.0E+0,3.0E+0]
+; X64-AVX1-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
; X64-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; X64-AVX1-NEXT: retq
;
; X86-AVX2-LABEL: elt1_v8f64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [4.2E+1,u,2.0E+0,3.0E+0]
-; X86-AVX2-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
-; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; X86-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; X86-AVX2-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; X86-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: elt1_v8f64:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.2E+1,u,2.0E+0,3.0E+0]
+; X64-AVX2-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
; X64-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; X64-AVX2-NEXT: retq
;
; X86-AVX512F-LABEL: elt1_v8f64:
; X86-AVX512F: # %bb.0:
-; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
-; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1]
-; X86-AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
+; X86-AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
; X86-AVX512F-NEXT: retl
;
; X64-AVX512F-LABEL: elt1_v8f64:
; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
+; X64-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
; X64-AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; X64-AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
; X64-AVX512F-NEXT: retq
%ins = insertelement <8 x double> <double 42.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, double %x, i32 1