diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2024-12-16 15:27:40 +0700 |
|---|---|---|
| committer | Matt Arsenault <arsenm2@gmail.com> | 2024-12-16 16:23:02 +0700 |
| commit | 3ea36fb381038e7be179ac40ff65a9d03df9a07b (patch) | |
| tree | fa089d5b865bc1b68dafc0a8257df26359242e7b | |
| parent | a3db5910b434d746c9c0585a092100ff7abcd1a0 (diff) | |
CodeGen: Treat subreg-to-subreg copies as isFullCopyInstrusers/arsenm/permit-subregisters-in-is-full-copy
This enables better copy folding during allocation.
I'm assuming the intent of this function is to identify copies
that do not change the register width (i.e. not subregister
insert or extract). Permit exact match subregisters since this
should result in a full copy for the final allocated copy.
This should probably be more permissive still. I think it should
accept any cases where getSubRegisterClass is the same for both
subregister indexes.
| -rw-r--r-- | llvm/include/llvm/CodeGen/TargetInstrInfo.h | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-global-i16.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir | 447 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 27 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-vst3.ll | 100 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-vst4.ll | 12 |
9 files changed, 249 insertions, 391 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 408adcd330b8..0e8731a9c405 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1109,7 +1109,7 @@ public: const MachineOperand *DestRegOp = DestSrc->Destination; const MachineOperand *SrcRegOp = DestSrc->Source; - return !DestRegOp->getSubReg() && !SrcRegOp->getSubReg(); + return DestRegOp->getSubReg() == SrcRegOp->getSubReg(); } /// If the specific machine instruction is an instruction that adds an diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index 64f1f45bf734..e246b44611ae 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -7274,7 +7274,7 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v15 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v17 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v20 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v20 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v16 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v14 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v14 @@ -7289,7 +7289,6 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v18 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v18 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v20 -; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v5 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v19 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v19 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v21 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index 8f53ec2f992d..d3523587b589 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -272,38 +272,24 @@ body: | ; RA-NEXT: internal [[COPY]].sub13:sgpr_512 = COPY [[DEF2]].sub13 ; RA-NEXT: internal [[COPY]].sub14:sgpr_512 = COPY [[DEF2]].sub14 ; RA-NEXT: } - ; RA-NEXT: undef [[COPY1:%[0-9]+]].sub4_sub5:sgpr_512 = COPY [[COPY]].sub4_sub5 { - ; RA-NEXT: internal [[COPY1]].sub10_sub11:sgpr_512 = COPY [[COPY]].sub10_sub11 - ; RA-NEXT: internal [[COPY1]].sub7:sgpr_512 = COPY [[COPY]].sub7 - ; RA-NEXT: internal [[COPY1]].sub8:sgpr_512 = COPY [[COPY]].sub8 - ; RA-NEXT: internal [[COPY1]].sub13:sgpr_512 = COPY [[COPY]].sub13 - ; RA-NEXT: internal [[COPY1]].sub14:sgpr_512 = COPY [[COPY]].sub14 - ; RA-NEXT: } - ; RA-NEXT: SI_SPILL_S512_SAVE [[COPY1]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) + ; RA-NEXT: SI_SPILL_S512_SAVE [[COPY]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) ; RA-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 ; RA-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) - ; RA-NEXT: undef [[COPY2:%[0-9]+]].sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { - ; RA-NEXT: internal [[COPY2]].sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 - ; RA-NEXT: internal [[COPY2]].sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7 - ; RA-NEXT: internal [[COPY2]].sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8 - ; RA-NEXT: internal [[COPY2]].sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13 - ; RA-NEXT: internal [[COPY2]].sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14 - ; RA-NEXT: } - ; RA-NEXT: undef [[COPY3:%[0-9]+]].sub4_sub5:sgpr_512 = COPY [[COPY2]].sub4_sub5 { - ; RA-NEXT: internal [[COPY3]].sub10_sub11:sgpr_512 = COPY [[COPY2]].sub10_sub11 - ; RA-NEXT: internal [[COPY3]].sub7:sgpr_512 = COPY [[COPY2]].sub7 - ; RA-NEXT: internal [[COPY3]].sub8:sgpr_512 = COPY [[COPY2]].sub8 - ; RA-NEXT: internal [[COPY3]].sub13:sgpr_512 = COPY [[COPY2]].sub13 - ; RA-NEXT: internal [[COPY3]].sub14:sgpr_512 = COPY [[COPY2]].sub14 + ; RA-NEXT: undef [[COPY1:%[0-9]+]].sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { + ; RA-NEXT: internal [[COPY1]].sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 + ; RA-NEXT: internal [[COPY1]].sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7 + ; RA-NEXT: internal [[COPY1]].sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8 + ; RA-NEXT: internal [[COPY1]].sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13 + ; RA-NEXT: internal [[COPY1]].sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14 ; RA-NEXT: } - ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub4, 0 :: (dereferenceable invariant load (s32)) - ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub5, 0 :: (dereferenceable invariant load (s32)) - ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub10, 0 :: (dereferenceable invariant load (s32)) - ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub11, 0 :: (dereferenceable invariant load (s32)) - ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub7, 0 :: (dereferenceable invariant load (s32)) - ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub8, 0 :: (dereferenceable invariant load (s32)) - ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub13, 0 :: (dereferenceable invariant load (s32)) - ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub14, 0 :: (dereferenceable invariant load (s32)) + ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub4, 0 :: (dereferenceable invariant load (s32)) + ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub5, 0 :: (dereferenceable invariant load (s32)) + ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub10, 0 :: (dereferenceable invariant load (s32)) + ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub11, 0 :: (dereferenceable invariant load (s32)) + ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub7, 0 :: (dereferenceable invariant load (s32)) + ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub8, 0 :: (dereferenceable invariant load (s32)) + ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub13, 0 :: (dereferenceable invariant load (s32)) + ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub14, 0 :: (dereferenceable invariant load (s32)) ; RA-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR]], implicit [[S_BUFFER_LOAD_DWORD_SGPR1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR2]], implicit [[S_BUFFER_LOAD_DWORD_SGPR3]], implicit [[S_BUFFER_LOAD_DWORD_SGPR4]], implicit [[S_BUFFER_LOAD_DWORD_SGPR5]], implicit [[S_BUFFER_LOAD_DWORD_SGPR6]], implicit [[S_BUFFER_LOAD_DWORD_SGPR7]] ; ; VR-LABEL: name: splitkit_copy_unbundle_reorder diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir index 42db92b15acf..2e6e15faa873 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir @@ -112,94 +112,64 @@ body: | ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY17]], %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) ; CHECK-NEXT: undef [[COPY18:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_8]].sub2 ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec - ; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub0:vreg_128 = COPY [[COPY18]].sub0 { - ; CHECK-NEXT: internal [[COPY19]].sub2:vreg_128 = COPY [[COPY18]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_9]].sub2 - ; CHECK-NEXT: [[COPY20:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK-NEXT: undef [[COPY21:%[0-9]+]].sub0:vreg_128 = COPY [[COPY20]].sub0 { - ; CHECK-NEXT: internal [[COPY21]].sub2:vreg_128 = COPY [[COPY20]].sub2 - ; CHECK-NEXT: } + ; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_9]].sub2 + ; CHECK-NEXT: [[COPY19:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec ; CHECK-NEXT: [[V_LSHRREV_B32_e32_10:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0:vreg_128 = COPY [[V_LSHRREV_B32_e32_10]].sub0 { - ; CHECK-NEXT: internal [[COPY22]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_10]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY23:%[0-9]+]].sub0:vreg_128 = COPY [[COPY22]].sub0 { - ; CHECK-NEXT: internal [[COPY23]].sub2:vreg_128 = COPY [[COPY22]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY23]], %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY24:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_11]].sub2 - ; CHECK-NEXT: [[COPY24:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK-NEXT: undef [[COPY25:%[0-9]+]].sub0:vreg_128 = COPY [[COPY24]].sub0 { - ; CHECK-NEXT: internal [[COPY25]].sub2:vreg_128 = COPY [[COPY24]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY25]], %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY26:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_12]].sub2 - ; CHECK-NEXT: [[COPY26:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec - ; CHECK-NEXT: undef [[COPY27:%[0-9]+]].sub0:vreg_128 = COPY [[COPY26]].sub0 { - ; CHECK-NEXT: internal [[COPY27]].sub2:vreg_128 = COPY [[COPY26]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY27]], %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY28:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_13]].sub2 - ; CHECK-NEXT: [[COPY28:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_e32_11:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_e32_12:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec + ; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_13]].sub2 + ; CHECK-NEXT: undef [[COPY21:%[0-9]+]].sub2:vreg_128 = COPY [[COPY20]].sub2 + ; CHECK-NEXT: [[COPY21:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec + ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0:vreg_128 = COPY [[COPY21]].sub0 { + ; CHECK-NEXT: internal [[COPY22]].sub2:vreg_128 = COPY [[COPY21]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY22]], %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY23:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_14]].sub2 + ; CHECK-NEXT: [[COPY23:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec + ; CHECK-NEXT: undef [[COPY24:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_15]].sub2 + ; CHECK-NEXT: undef [[COPY25:%[0-9]+]].sub2:vreg_128 = COPY [[COPY24]].sub2 + ; CHECK-NEXT: [[COPY25:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec + ; CHECK-NEXT: undef [[COPY26:%[0-9]+]].sub0:vreg_128 = COPY [[COPY25]].sub0 { + ; CHECK-NEXT: internal [[COPY26]].sub2:vreg_128 = COPY [[COPY25]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY26]], %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY27:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_16]].sub2 + ; CHECK-NEXT: undef [[COPY28:%[0-9]+]].sub2:vreg_128 = COPY [[COPY27]].sub2 + ; CHECK-NEXT: [[COPY28:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec ; CHECK-NEXT: undef [[COPY29:%[0-9]+]].sub0:vreg_128 = COPY [[COPY28]].sub0 { ; CHECK-NEXT: internal [[COPY29]].sub2:vreg_128 = COPY [[COPY28]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY30:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_14]].sub2 - ; CHECK-NEXT: [[COPY30:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec - ; CHECK-NEXT: undef [[COPY31:%[0-9]+]].sub0:vreg_128 = COPY [[COPY30]].sub0 { - ; CHECK-NEXT: internal [[COPY31]].sub2:vreg_128 = COPY [[COPY30]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY31]], %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY32:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_15]].sub2 - ; CHECK-NEXT: [[COPY32:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec - ; CHECK-NEXT: undef [[COPY33:%[0-9]+]].sub0:vreg_128 = COPY [[COPY32]].sub0 { - ; CHECK-NEXT: internal [[COPY33]].sub2:vreg_128 = COPY [[COPY32]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY34:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_16]].sub2 - ; CHECK-NEXT: [[COPY34:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec + ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY29]], %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY30:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_17]].sub2 + ; CHECK-NEXT: undef [[COPY31:%[0-9]+]].sub2:vreg_128 = COPY [[COPY30]].sub2 + ; CHECK-NEXT: [[COPY31:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK-NEXT: undef [[COPY32:%[0-9]+]].sub0:vreg_128 = COPY [[COPY31]].sub0 { + ; CHECK-NEXT: internal [[COPY32]].sub2:vreg_128 = COPY [[COPY31]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY32]], %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY33:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_18]].sub2 + ; CHECK-NEXT: undef [[COPY34:%[0-9]+]].sub2:vreg_128 = COPY [[COPY33]].sub2 + ; CHECK-NEXT: [[COPY34:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec ; CHECK-NEXT: undef [[COPY35:%[0-9]+]].sub0:vreg_128 = COPY [[COPY34]].sub0 { ; CHECK-NEXT: internal [[COPY35]].sub2:vreg_128 = COPY [[COPY34]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY36:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_17]].sub2 - ; CHECK-NEXT: [[COPY36:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec - ; CHECK-NEXT: undef [[COPY37:%[0-9]+]].sub0:vreg_128 = COPY [[COPY36]].sub0 { - ; CHECK-NEXT: internal [[COPY37]].sub2:vreg_128 = COPY [[COPY36]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY38:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_18]].sub2 - ; CHECK-NEXT: [[COPY38:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK-NEXT: undef [[COPY39:%[0-9]+]].sub0:vreg_128 = COPY [[COPY38]].sub0 { - ; CHECK-NEXT: internal [[COPY39]].sub2:vreg_128 = COPY [[COPY38]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY40:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_19]].sub2 - ; CHECK-NEXT: [[COPY40:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK-NEXT: undef [[COPY41:%[0-9]+]].sub0:vreg_128 = COPY [[COPY40]].sub0 { - ; CHECK-NEXT: internal [[COPY41]].sub2:vreg_128 = COPY [[COPY40]].sub2 - ; CHECK-NEXT: } + ; CHECK-NEXT: undef [[COPY36:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_19]].sub2 + ; CHECK-NEXT: [[COPY36:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_27:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef [[COPY42:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_27]].sub2 - ; CHECK-NEXT: [[COPY42:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef [[COPY43:%[0-9]+]].sub0:vreg_128 = COPY [[COPY42]].sub0 { - ; CHECK-NEXT: internal [[COPY43]].sub2:vreg_128 = COPY [[COPY42]].sub2 - ; CHECK-NEXT: } + ; CHECK-NEXT: undef [[COPY37:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_27]].sub2 + ; CHECK-NEXT: [[COPY37:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_28:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef [[COPY44:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_28]].sub2 - ; CHECK-NEXT: [[COPY44:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef [[COPY45:%[0-9]+]].sub0:vreg_128 = COPY [[COPY44]].sub0 { - ; CHECK-NEXT: internal [[COPY45]].sub2:vreg_128 = COPY [[COPY44]].sub2 - ; CHECK-NEXT: } + ; CHECK-NEXT: undef [[COPY38:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_28]].sub2 + ; CHECK-NEXT: [[COPY38:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_29:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef [[COPY46:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_29]].sub2 - ; CHECK-NEXT: [[COPY46:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef [[COPY47:%[0-9]+]].sub0:vreg_128 = COPY [[COPY46]].sub0 { - ; CHECK-NEXT: internal [[COPY47]].sub2:vreg_128 = COPY [[COPY46]].sub2 - ; CHECK-NEXT: } + ; CHECK-NEXT: undef [[COPY39:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_29]].sub2 + ; CHECK-NEXT: [[COPY39:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_30:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK-NEXT: undef [[COPY48:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_30]].sub2 - ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK-NEXT: undef [[COPY40:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_30]].sub2 + ; CHECK-NEXT: [[COPY40:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_31:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec - ; CHECK-NEXT: undef [[COPY49:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_31]].sub2 - ; CHECK-NEXT: [[COPY49:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK-NEXT: undef [[COPY41:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_31]].sub2 + ; CHECK-NEXT: [[COPY41:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK-NEXT: [[V_LSHRREV_B32_e32_21:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec ; CHECK-NEXT: [[V_LSHRREV_B32_e32_22:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec @@ -228,237 +198,162 @@ body: | ; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 ; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_20]], [[S_MOV_B32_]], 0, 384, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef [[COPY50:%[0-9]+]].sub0:vreg_128 = COPY [[COPY49]].sub0 { - ; CHECK-NEXT: internal [[COPY50]].sub2:vreg_128 = COPY [[COPY49]].sub2 + ; CHECK-NEXT: undef [[COPY42:%[0-9]+]].sub0:vreg_128 = COPY [[COPY41]].sub0 { + ; CHECK-NEXT: internal [[COPY42]].sub2:vreg_128 = COPY [[COPY41]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY42:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY42:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY42]], [[S_MOV_B32_]], 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef [[COPY43:%[0-9]+]].sub0:vreg_128 = COPY [[COPY40]].sub0 { + ; CHECK-NEXT: internal [[COPY43]].sub2:vreg_128 = COPY [[COPY40]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY43:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY43:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY43]], [[S_MOV_B32_]], 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: undef [[COPY44:%[0-9]+]].sub0:vreg_128 = COPY [[COPY39]].sub0 { + ; CHECK-NEXT: internal [[COPY44]].sub2:vreg_128 = COPY [[COPY39]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY44:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY44:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY44]], [[S_MOV_B32_]], 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef [[COPY45:%[0-9]+]].sub0:vreg_128 = COPY [[COPY38]].sub0 { + ; CHECK-NEXT: internal [[COPY45]].sub2:vreg_128 = COPY [[COPY38]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY45:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY45:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY45]], [[S_MOV_B32_]], 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: undef [[COPY46:%[0-9]+]].sub0:vreg_128 = COPY [[COPY37]].sub0 { + ; CHECK-NEXT: internal [[COPY46]].sub2:vreg_128 = COPY [[COPY37]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY46:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY46:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY46]], [[S_MOV_B32_]], 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef [[COPY47:%[0-9]+]].sub0:vreg_128 = COPY [[COPY36]].sub0 { + ; CHECK-NEXT: internal [[COPY47]].sub2:vreg_128 = COPY [[COPY36]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY47:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY47:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY47]], [[S_MOV_B32_]], 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: undef [[COPY48:%[0-9]+]].sub0:vreg_128 = COPY [[COPY35]].sub0 { + ; CHECK-NEXT: internal [[COPY48]].sub2:vreg_128 = COPY [[COPY35]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY48]], [[S_MOV_B32_]], 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY49:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 { + ; CHECK-NEXT: internal [[COPY49]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY49:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY49:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY49]], [[S_MOV_B32_]], 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY50:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 { + ; CHECK-NEXT: internal [[COPY50]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY50]], [[S_MOV_B32_]], 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef [[COPY51:%[0-9]+]].sub0:vreg_128 = COPY [[COPY48]].sub0 { - ; CHECK-NEXT: internal [[COPY51]].sub2:vreg_128 = COPY [[COPY48]].sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY50]], [[S_MOV_B32_]], 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY51:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 { + ; CHECK-NEXT: internal [[COPY51]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: [[COPY51:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 ; CHECK-NEXT: [[COPY51:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY51]], [[S_MOV_B32_]], 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef [[COPY52:%[0-9]+]].sub0:vreg_128 = COPY [[COPY47]].sub0 { - ; CHECK-NEXT: internal [[COPY52]].sub2:vreg_128 = COPY [[COPY47]].sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY51]], [[S_MOV_B32_]], 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: undef [[COPY52:%[0-9]+]].sub0:vreg_128 = COPY [[COPY23]].sub0 { + ; CHECK-NEXT: internal [[COPY52]].sub2:vreg_128 = COPY [[COPY23]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY53:%[0-9]+]].sub0:vreg_128 = COPY [[COPY52]].sub0 { - ; CHECK-NEXT: internal [[COPY53]].sub2:vreg_128 = COPY [[COPY52]].sub2 + ; CHECK-NEXT: [[COPY52:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY52:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY52]], [[S_MOV_B32_]], 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY53:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 { + ; CHECK-NEXT: internal [[COPY53]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: [[COPY53:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 ; CHECK-NEXT: [[COPY53:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY53]], [[S_MOV_B32_]], 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef [[COPY54:%[0-9]+]].sub0:vreg_128 = COPY [[COPY45]].sub0 { - ; CHECK-NEXT: internal [[COPY54]].sub2:vreg_128 = COPY [[COPY45]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY55:%[0-9]+]].sub0:vreg_128 = COPY [[COPY54]].sub0 { - ; CHECK-NEXT: internal [[COPY55]].sub2:vreg_128 = COPY [[COPY54]].sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY53]], [[S_MOV_B32_]], 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: undef [[COPY54:%[0-9]+]].sub0:vreg_128 = COPY [[V_LSHRREV_B32_e32_12]].sub0 { + ; CHECK-NEXT: internal [[COPY54]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_12]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY54:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY54:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY54]], [[S_MOV_B32_]], 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[V_LSHRREV_B32_e32_11:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[V_LSHRREV_B32_e32_11:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_11]], [[S_MOV_B32_]], 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[V_LSHRREV_B32_e32_10:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[V_LSHRREV_B32_e32_10:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_10]], [[S_MOV_B32_]], 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef [[COPY55:%[0-9]+]].sub0:vreg_128 = COPY [[COPY19]].sub0 { + ; CHECK-NEXT: internal [[COPY55]].sub2:vreg_128 = COPY [[COPY19]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: [[COPY55:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 ; CHECK-NEXT: [[COPY55:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY55]], [[S_MOV_B32_]], 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: undef [[COPY56:%[0-9]+]].sub0:vreg_128 = COPY [[COPY43]].sub0 { - ; CHECK-NEXT: internal [[COPY56]].sub2:vreg_128 = COPY [[COPY43]].sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY55]], [[S_MOV_B32_]], 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) + ; CHECK-NEXT: undef [[COPY56:%[0-9]+]].sub0:vreg_128 = COPY [[COPY18]].sub0 { + ; CHECK-NEXT: internal [[COPY56]].sub2:vreg_128 = COPY [[COPY18]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY57:%[0-9]+]].sub0:vreg_128 = COPY [[COPY56]].sub0 { - ; CHECK-NEXT: internal [[COPY57]].sub2:vreg_128 = COPY [[COPY56]].sub2 + ; CHECK-NEXT: [[COPY56:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY56:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY56]], [[S_MOV_B32_]], 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) + ; CHECK-NEXT: undef [[COPY57:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 { + ; CHECK-NEXT: internal [[COPY57]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: [[COPY57:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 ; CHECK-NEXT: [[COPY57:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY57]], [[S_MOV_B32_]], 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef [[COPY58:%[0-9]+]].sub0:vreg_128 = COPY [[COPY41]].sub0 { - ; CHECK-NEXT: internal [[COPY58]].sub2:vreg_128 = COPY [[COPY41]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY59:%[0-9]+]].sub0:vreg_128 = COPY [[COPY58]].sub0 { - ; CHECK-NEXT: internal [[COPY59]].sub2:vreg_128 = COPY [[COPY58]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY59]], [[S_MOV_B32_]], 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef [[COPY60:%[0-9]+]].sub0:vreg_128 = COPY [[COPY39]].sub0 { - ; CHECK-NEXT: internal [[COPY60]].sub2:vreg_128 = COPY [[COPY39]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY61:%[0-9]+]].sub0:vreg_128 = COPY [[COPY60]].sub0 { - ; CHECK-NEXT: internal [[COPY61]].sub2:vreg_128 = COPY [[COPY60]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY61]], [[S_MOV_B32_]], 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef [[COPY62:%[0-9]+]].sub0:vreg_128 = COPY [[COPY37]].sub0 { - ; CHECK-NEXT: internal [[COPY62]].sub2:vreg_128 = COPY [[COPY37]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY63:%[0-9]+]].sub0:vreg_128 = COPY [[COPY62]].sub0 { - ; CHECK-NEXT: internal [[COPY63]].sub2:vreg_128 = COPY [[COPY62]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY63]], [[S_MOV_B32_]], 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) - ; CHECK-NEXT: undef [[COPY64:%[0-9]+]].sub0:vreg_128 = COPY [[COPY35]].sub0 { - ; CHECK-NEXT: internal [[COPY64]].sub2:vreg_128 = COPY [[COPY35]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY65:%[0-9]+]].sub0:vreg_128 = COPY [[COPY64]].sub0 { - ; CHECK-NEXT: internal [[COPY65]].sub2:vreg_128 = COPY [[COPY64]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY65]], [[S_MOV_B32_]], 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef [[COPY66:%[0-9]+]].sub0:vreg_128 = COPY [[COPY33]].sub0 { - ; CHECK-NEXT: internal [[COPY66]].sub2:vreg_128 = COPY [[COPY33]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY67:%[0-9]+]].sub0:vreg_128 = COPY [[COPY66]].sub0 { - ; CHECK-NEXT: internal [[COPY67]].sub2:vreg_128 = COPY [[COPY66]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY67]], [[S_MOV_B32_]], 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY68:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 { - ; CHECK-NEXT: internal [[COPY68]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY69:%[0-9]+]].sub0:vreg_128 = COPY [[COPY68]].sub0 { - ; CHECK-NEXT: internal [[COPY69]].sub2:vreg_128 = COPY [[COPY68]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY69]], [[S_MOV_B32_]], 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef [[COPY70:%[0-9]+]].sub0:vreg_128 = COPY [[COPY29]].sub0 { - ; CHECK-NEXT: internal [[COPY70]].sub2:vreg_128 = COPY [[COPY29]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY71:%[0-9]+]].sub0:vreg_128 = COPY [[COPY70]].sub0 { - ; CHECK-NEXT: internal [[COPY71]].sub2:vreg_128 = COPY [[COPY70]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY71:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY71:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY71]], [[S_MOV_B32_]], 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY72:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 { - ; CHECK-NEXT: internal [[COPY72]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY73:%[0-9]+]].sub0:vreg_128 = COPY [[COPY72]].sub0 { - ; CHECK-NEXT: internal [[COPY73]].sub2:vreg_128 = COPY [[COPY72]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY73:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY73:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY73]], [[S_MOV_B32_]], 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY74:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 { - ; CHECK-NEXT: internal [[COPY74]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY75:%[0-9]+]].sub0:vreg_128 = COPY [[COPY74]].sub0 { - ; CHECK-NEXT: internal [[COPY75]].sub2:vreg_128 = COPY [[COPY74]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY75:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY75:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY75]], [[S_MOV_B32_]], 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY76:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 { - ; CHECK-NEXT: internal [[COPY76]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY77:%[0-9]+]].sub0:vreg_128 = COPY [[COPY76]].sub0 { - ; CHECK-NEXT: internal [[COPY77]].sub2:vreg_128 = COPY [[COPY76]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY77:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY77:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY77]], [[S_MOV_B32_]], 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef [[COPY78:%[0-9]+]].sub0:vreg_128 = COPY [[COPY21]].sub0 { - ; CHECK-NEXT: internal [[COPY78]].sub2:vreg_128 = COPY [[COPY21]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY79:%[0-9]+]].sub0:vreg_128 = COPY [[COPY78]].sub0 { - ; CHECK-NEXT: internal [[COPY79]].sub2:vreg_128 = COPY [[COPY78]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY79:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY79:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY79]], [[S_MOV_B32_]], 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef [[COPY80:%[0-9]+]].sub0:vreg_128 = COPY [[COPY19]].sub0 { - ; CHECK-NEXT: internal [[COPY80]].sub2:vreg_128 = COPY [[COPY19]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY81:%[0-9]+]].sub0:vreg_128 = COPY [[COPY80]].sub0 { - ; CHECK-NEXT: internal [[COPY81]].sub2:vreg_128 = COPY [[COPY80]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY81:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY81:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY81]], [[S_MOV_B32_]], 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY82:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 { - ; CHECK-NEXT: internal [[COPY82]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY83:%[0-9]+]].sub0:vreg_128 = COPY [[COPY82]].sub0 { - ; CHECK-NEXT: internal [[COPY83]].sub2:vreg_128 = COPY [[COPY82]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY83:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY83:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY83]], [[S_MOV_B32_]], 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY57]], [[S_MOV_B32_]], 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY84:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 { - ; CHECK-NEXT: internal [[COPY84]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY85:%[0-9]+]].sub0:vreg_128 = COPY [[COPY84]].sub0 { - ; CHECK-NEXT: internal [[COPY85]].sub2:vreg_128 = COPY [[COPY84]].sub2 + ; CHECK-NEXT: undef [[COPY58:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 { + ; CHECK-NEXT: internal [[COPY58]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY85:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY85:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY85]], [[S_MOV_B32_]], 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[COPY58:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY58:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY58]], [[S_MOV_B32_]], 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY86:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 { - ; CHECK-NEXT: internal [[COPY86]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2 + ; CHECK-NEXT: undef [[COPY59:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 { + ; CHECK-NEXT: internal [[COPY59]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY87:%[0-9]+]].sub0:vreg_128 = COPY [[COPY86]].sub0 { - ; CHECK-NEXT: internal [[COPY87]].sub2:vreg_128 = COPY [[COPY86]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY87:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY87:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY87]], [[S_MOV_B32_]], 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY59]], [[S_MOV_B32_]], 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY88:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 { - ; CHECK-NEXT: internal [[COPY88]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2 + ; CHECK-NEXT: undef [[COPY60:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 { + ; CHECK-NEXT: internal [[COPY60]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY89:%[0-9]+]].sub0:vreg_128 = COPY [[COPY88]].sub0 { - ; CHECK-NEXT: internal [[COPY89]].sub2:vreg_128 = COPY [[COPY88]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY89:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY89:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY89]], [[S_MOV_B32_]], 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[COPY60:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY60:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY60]], [[S_MOV_B32_]], 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY90:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 { - ; CHECK-NEXT: internal [[COPY90]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY91:%[0-9]+]].sub0:vreg_128 = COPY [[COPY90]].sub0 { - ; CHECK-NEXT: internal [[COPY91]].sub2:vreg_128 = COPY [[COPY90]].sub2 + ; CHECK-NEXT: undef [[COPY61:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 { + ; CHECK-NEXT: internal [[COPY61]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY91:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY91:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY91]], [[S_MOV_B32_]], 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY61]], [[S_MOV_B32_]], 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY92:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 { - ; CHECK-NEXT: internal [[COPY92]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY93:%[0-9]+]].sub0:vreg_128 = COPY [[COPY92]].sub0 { - ; CHECK-NEXT: internal [[COPY93]].sub2:vreg_128 = COPY [[COPY92]].sub2 + ; CHECK-NEXT: undef [[COPY62:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 { + ; CHECK-NEXT: internal [[COPY62]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY93:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY93:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY93]], [[S_MOV_B32_]], 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[COPY62:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY62:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY62]], [[S_MOV_B32_]], 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY94:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 { - ; CHECK-NEXT: internal [[COPY94]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY95:%[0-9]+]].sub0:vreg_128 = COPY [[COPY94]].sub0 { - ; CHECK-NEXT: internal [[COPY95]].sub2:vreg_128 = COPY [[COPY94]].sub2 + ; CHECK-NEXT: undef [[COPY63:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 { + ; CHECK-NEXT: internal [[COPY63]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY95:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY95:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY95]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) + ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY63]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY96:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 { - ; CHECK-NEXT: internal [[COPY96]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: undef [[COPY97:%[0-9]+]].sub0:vreg_128 = COPY [[COPY96]].sub0 { - ; CHECK-NEXT: internal [[COPY97]].sub2:vreg_128 = COPY [[COPY96]].sub2 + ; CHECK-NEXT: undef [[COPY64:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 { + ; CHECK-NEXT: internal [[COPY64]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY97:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: [[COPY97:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY97]], [[S_MOV_B32_]], 0, 16, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[COPY64:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: [[COPY64:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY64]], [[S_MOV_B32_]], 0, 16, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sgpr_64(p4) = COPY $sgpr0_sgpr1 %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll b/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll index 07839a43331f..8044bf2f6b45 100644 --- a/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll +++ b/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll @@ -231,8 +231,8 @@ define void @add_e_i8(i8 signext %0, i8 signext %1) { ; CHECK-NEXT: mov r26, r26 ; CHECK-NEXT: add r26, r30 ; CHECK-NEXT: ;NO_APP -; CHECK-NEXT: mov r20, r30 ; CHECK-NEXT: mov r24, r26 +; CHECK-NEXT: mov r20, r30 ; CHECK-NEXT: rcall foo8 ; CHECK-NEXT: ret %3 = tail call i8 asm sideeffect "mov $0, $1\0Aadd $0, $2", "=e,e,e"(i8 %0, i8 %1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index a91dee1cb245..46c190532052 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -411,8 +411,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: slli a4, a4, 4 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v24, v16 +; CHECK-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: slli a4, a4, 3 ; CHECK-NEXT: add a4, sp, a4 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index 7ca1983e8b32..7f65a0d0c50e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -927,13 +927,12 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfl ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 ; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vmv4r.v v8, v24 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a4, a2, 4 ; CHECK-NEXT: add a2, a4, a2 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 ; CHECK-NEXT: csrr a2, vlenb @@ -1080,13 +1079,12 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat> ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 ; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vmv4r.v v8, v24 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a4, a2, 4 ; CHECK-NEXT: add a2, a4, a2 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 ; CHECK-NEXT: csrr a2, vlenb @@ -1221,12 +1219,11 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 ; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vmv4r.v v8, v24 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 ; CHECK-NEXT: addi a2, sp, 16 @@ -1324,12 +1321,11 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 ; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vmv4r.v v8, v24 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 ; CHECK-NEXT: addi a2, sp, 16 @@ -2505,13 +2501,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vmv4r.v v8, v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a4, a2, 4 ; ZVFHMIN-NEXT: add a2, a4, a2 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: csrr a2, vlenb @@ -2664,13 +2659,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vmv4r.v v8, v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a4, a2, 4 ; ZVFHMIN-NEXT: add a2, a4, a2 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: csrr a2, vlenb @@ -2811,12 +2805,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vmv4r.v v8, v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: addi a2, sp, 16 @@ -2920,12 +2913,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vmv4r.v v8, v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: addi a2, sp, 16 @@ -12499,7 +12491,6 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half> ; ZVFHMIN-NEXT: vxor.vx v24, v16, a3, v0.t ; ZVFHMIN-NEXT: slli a2, a1, 1 ; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: vmv4r.v v20, v28 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -12507,7 +12498,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half> ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: bltu a0, a2, .LBB306_2 diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll index 85317e1fe462..51b52be9a84d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -405,31 +405,28 @@ define void @vst3_v16i16(ptr %src, ptr %dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #48 ; CHECK-NEXT: sub sp, #48 -; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vldrw.u32 q1, [r0, #32] +; CHECK-NEXT: vldrw.u32 q6, [r0] +; CHECK-NEXT: vldrw.u32 q2, [r0, #64] ; CHECK-NEXT: vldrw.u32 q7, [r0, #80] -; CHECK-NEXT: vmov.f32 s0, s11 +; CHECK-NEXT: vmov.f32 s0, s27 ; CHECK-NEXT: vmov.u16 r2, q1[5] ; CHECK-NEXT: vmov.16 q3[0], r2 ; CHECK-NEXT: vins.f16 s0, s7 -; CHECK-NEXT: vmov.f32 s2, s11 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.f64 d12, d4 -; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.f32 s26, s10 -; CHECK-NEXT: vldrw.u32 q2, [r0, #64] ; CHECK-NEXT: vmov.f32 s13, s0 -; CHECK-NEXT: vstrw.32 q6, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.u16 r2, q1[7] ; CHECK-NEXT: vmov.16 q3[6], r2 ; CHECK-NEXT: vmovx.f16 s0, s10 ; CHECK-NEXT: vins.f16 s12, s0 -; CHECK-NEXT: vmovx.f16 s0, s2 +; CHECK-NEXT: vmovx.f16 s0, s27 ; CHECK-NEXT: vmov.f32 s14, s11 +; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vins.f16 s14, s0 -; CHECK-NEXT: vmov.f32 s20, s7 +; CHECK-NEXT: vldrw.u32 q1, [r0, #16] ; CHECK-NEXT: vmov q0, q3 ; CHECK-NEXT: vldrw.u32 q3, [r0, #48] +; CHECK-NEXT: vmov.f32 s20, s7 +; CHECK-NEXT: vstrw.32 q6, [sp] @ 16-byte Spill ; CHECK-NEXT: vmov.u16 r2, q3[5] ; CHECK-NEXT: vins.f16 s20, s15 ; CHECK-NEXT: vmov.16 q4[0], r2 @@ -472,29 +469,27 @@ define void @vst3_v16i16(ptr %src, ptr %dst) { ; CHECK-NEXT: vmov.u16 r0, q3[3] ; CHECK-NEXT: vins.f16 s4, s14 ; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vins.f16 s26, s8 +; CHECK-NEXT: vmov.f32 s18, s31 ; CHECK-NEXT: vmov.f32 s2, s4 ; CHECK-NEXT: vmovx.f16 s4, s29 +; CHECK-NEXT: vmovx.f16 s0, s5 ; CHECK-NEXT: vins.f16 s1, s4 ; CHECK-NEXT: vmovx.f16 s4, s6 -; CHECK-NEXT: vmovx.f16 s0, s5 -; CHECK-NEXT: vins.f16 s30, s4 -; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload ; CHECK-NEXT: vins.f16 s29, s0 +; CHECK-NEXT: vins.f16 s30, s4 +; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s25, s28 +; CHECK-NEXT: vins.f16 s26, s8 ; CHECK-NEXT: vmov.f32 s0, s29 -; CHECK-NEXT: vins.f16 s22, s11 +; CHECK-NEXT: vmov.u16 r0, q1[3] ; CHECK-NEXT: vmov.f32 s3, s30 -; CHECK-NEXT: vstrw.32 q5, [r1] -; CHECK-NEXT: vmov.f32 s29, s5 +; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload +; CHECK-NEXT: vins.f16 s22, s11 +; CHECK-NEXT: vstrw.32 q6, [r1, #48] +; CHECK-NEXT: vmov.f32 s8, s30 ; CHECK-NEXT: vstrw.32 q0, [r1, #64] -; CHECK-NEXT: vmov.f32 s30, s6 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s31 -; CHECK-NEXT: vmov.u16 r0, q1[3] ; CHECK-NEXT: vins.f16 s8, s6 ; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.f32 s25, s28 ; CHECK-NEXT: vmov.f32 s6, s8 ; CHECK-NEXT: vmovx.f16 s8, s9 ; CHECK-NEXT: vmovx.f16 s4, s29 @@ -504,10 +499,10 @@ define void @vst3_v16i16(ptr %src, ptr %dst) { ; CHECK-NEXT: vins.f16 s10, s8 ; CHECK-NEXT: vmov.f32 s4, s9 ; CHECK-NEXT: vmov.f32 s7, s10 -; CHECK-NEXT: vstrw.32 q6, [r1, #48] +; CHECK-NEXT: vstrw.32 q4, [r1, #80] ; CHECK-NEXT: vstrw.32 q1, [r1, #16] ; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vstrw.32 q4, [r1, #80] +; CHECK-NEXT: vstrw.32 q5, [r1] ; CHECK-NEXT: vstrw.32 q1, [r1, #32] ; CHECK-NEXT: add sp, #48 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} @@ -1043,10 +1038,10 @@ define void @vst3_v8f32(ptr %src, ptr %dst) { ; CHECK-NEXT: vstrw.32 q5, [r1] ; CHECK-NEXT: vmov.f32 s10, s19 ; CHECK-NEXT: vmov.f32 s11, s31 -; CHECK-NEXT: vmov.f32 s5, s29 -; CHECK-NEXT: vstrw.32 q2, [r1, #32] ; CHECK-NEXT: vmov.f32 s4, s17 +; CHECK-NEXT: vstrw.32 q2, [r1, #32] ; CHECK-NEXT: vmov.f32 s7, s18 +; CHECK-NEXT: vmov.f32 s5, s29 ; CHECK-NEXT: vstrw.32 q1, [r1, #16] ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} @@ -1332,22 +1327,24 @@ define void @vst3_v16f16(ptr %src, ptr %dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #48 ; CHECK-NEXT: sub sp, #48 -; CHECK-NEXT: vldrw.u32 q3, [r0, #16] +; CHECK-NEXT: vldrw.u32 q5, [r0, #16] ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] ; CHECK-NEXT: vldrw.u32 q6, [r0, #32] -; CHECK-NEXT: vmov.f32 s8, s12 +; CHECK-NEXT: vmov.f32 s8, s20 ; CHECK-NEXT: vmovx.f16 s2, s4 -; CHECK-NEXT: vmov.f32 s0, s13 +; CHECK-NEXT: vmov.f32 s0, s21 ; CHECK-NEXT: vins.f16 s8, s4 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vins.f16 s0, s5 ; CHECK-NEXT: vmov.16 q2[4], r2 -; CHECK-NEXT: vmov q4, q3 +; CHECK-NEXT: vmov.f64 d15, d13 ; CHECK-NEXT: vmov.f32 s11, s0 -; CHECK-NEXT: vmovx.f16 s0, s16 +; CHECK-NEXT: vmovx.f16 s0, s20 ; CHECK-NEXT: vmov.f32 s12, s8 -; CHECK-NEXT: vmov.f64 d11, d9 -; CHECK-NEXT: vmov.f32 s21, s17 +; CHECK-NEXT: vmov.f32 s29, s25 +; CHECK-NEXT: vmov.f32 s4, s23 +; CHECK-NEXT: vstrw.32 q7, [sp] @ 16-byte Spill +; CHECK-NEXT: vins.f16 s4, s7 ; CHECK-NEXT: vmov.f64 d7, d5 ; CHECK-NEXT: vldrw.u32 q2, [r0, #80] ; CHECK-NEXT: vmovx.f16 s2, s8 @@ -1364,7 +1361,6 @@ define void @vst3_v16f16(ptr %src, ptr %dst) { ; CHECK-NEXT: vins.f16 s0, s25 ; CHECK-NEXT: vmov.f32 s19, s0 ; CHECK-NEXT: vmovx.f16 s0, s12 -; CHECK-NEXT: vmov.f64 d15, d13 ; CHECK-NEXT: vmov.f32 s17, s13 ; CHECK-NEXT: vmov.f32 s24, s16 ; CHECK-NEXT: vmov.f64 d13, d9 @@ -1378,30 +1374,28 @@ define void @vst3_v16f16(ptr %src, ptr %dst) { ; CHECK-NEXT: vstrw.32 q6, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vins.f16 s0, s31 -; CHECK-NEXT: vmov.f32 s29, s25 ; CHECK-NEXT: vmov.16 q6[0], r0 +; CHECK-NEXT: vmovx.f16 s2, s15 ; CHECK-NEXT: vmov.f32 s25, s0 ; CHECK-NEXT: vmovx.f16 s0, s31 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vmovx.f16 s0, s14 ; CHECK-NEXT: vmov.16 q6[6], r0 -; CHECK-NEXT: vmovx.f16 s2, s15 +; CHECK-NEXT: vldrw.u32 q7, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vins.f16 s24, s0 ; CHECK-NEXT: vmovx.f16 s0, s19 ; CHECK-NEXT: vins.f16 s15, s0 ; CHECK-NEXT: vmovx.f16 s0, s6 -; CHECK-NEXT: vmov.f32 s4, s23 -; CHECK-NEXT: vins.f16 s27, s2 ; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vins.f16 s4, s7 +; CHECK-NEXT: vins.f16 s27, s2 ; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vstrw.32 q7, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.f32 s29, s12 ; CHECK-NEXT: vmov.f32 s1, s4 ; CHECK-NEXT: vmovx.f16 s4, s7 ; CHECK-NEXT: vmov r0, s4 ; CHECK-NEXT: vmovx.f16 s4, s10 ; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vldrw.u32 q7, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmovx.f16 s12, s9 ; CHECK-NEXT: vins.f16 s0, s4 ; CHECK-NEXT: vmovx.f16 s4, s11 ; CHECK-NEXT: vmovx.f16 s2, s23 @@ -1409,21 +1403,20 @@ define void @vst3_v16f16(ptr %src, ptr %dst) { ; CHECK-NEXT: vmovx.f16 s4, s5 ; CHECK-NEXT: vins.f16 s11, s2 ; CHECK-NEXT: vmov.f32 s2, s22 +; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vmov r0, s4 ; CHECK-NEXT: vins.f16 s2, s6 ; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.f32 s29, s12 +; CHECK-NEXT: vmov.f32 s26, s15 ; CHECK-NEXT: vmovx.f16 s4, s21 -; CHECK-NEXT: vmovx.f16 s12, s9 +; CHECK-NEXT: vmov.f32 s21, s17 ; CHECK-NEXT: vins.f16 s9, s4 ; CHECK-NEXT: vmovx.f16 s4, s22 ; CHECK-NEXT: vins.f16 s10, s4 -; CHECK-NEXT: vmov.f32 s21, s17 ; CHECK-NEXT: vmov.f32 s22, s18 -; CHECK-NEXT: vins.f16 s5, s12 ; CHECK-NEXT: vmov.f32 s4, s18 ; CHECK-NEXT: vldrw.u32 q4, [sp] @ 16-byte Reload -; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vins.f16 s5, s12 ; CHECK-NEXT: vmov.f32 s6, s2 ; CHECK-NEXT: vmovx.f16 s12, s17 ; CHECK-NEXT: vins.f16 s4, s18 @@ -1439,17 +1432,16 @@ define void @vst3_v16f16(ptr %src, ptr %dst) { ; CHECK-NEXT: vldrw.u32 q5, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r1, #80] ; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s26, s15 -; CHECK-NEXT: vins.f16 s29, s12 ; CHECK-NEXT: vmov.f32 s21, s8 -; CHECK-NEXT: vstrw.32 q6, [r1, #32] +; CHECK-NEXT: vins.f16 s29, s12 ; CHECK-NEXT: vmov.f32 s4, s9 -; CHECK-NEXT: vstrw.32 q5, [r1, #48] +; CHECK-NEXT: vstrw.32 q6, [r1, #32] ; CHECK-NEXT: vmov.f32 s7, s10 -; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vstrw.32 q5, [r1, #48] ; CHECK-NEXT: vmov.f32 s28, s13 ; CHECK-NEXT: vstrw.32 q1, [r1, #64] ; CHECK-NEXT: vmov.f32 s31, s14 +; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vstrw.32 q7, [r1, #16] ; CHECK-NEXT: add sp, #48 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll index b36904495e87..013875ab0348 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll @@ -119,22 +119,20 @@ define void @vst4_v16i32(ptr %src, ptr %dst) { ; CHECK-NEXT: sub sp, #192 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] ; CHECK-NEXT: add r2, sp, #64 -; CHECK-NEXT: vldrw.u32 q4, [r0, #176] ; CHECK-NEXT: vldrw.u32 q3, [r0, #208] ; CHECK-NEXT: vldrw.u32 q2, [r0, #144] ; CHECK-NEXT: vldrw.u32 q1, [r0, #80] ; CHECK-NEXT: vstmia r2, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: vldrw.u32 q2, [r0, #128] -; CHECK-NEXT: vldrw.u32 q5, [r0, #240] -; CHECK-NEXT: vmov q6, q4 ; CHECK-NEXT: vldrw.u32 q3, [r0, #192] ; CHECK-NEXT: vldrw.u32 q1, [r0, #64] ; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill +; CHECK-NEXT: vldrw.u32 q6, [r0, #176] ; CHECK-NEXT: vldrw.u32 q2, [r0, #160] ; CHECK-NEXT: vldrw.u32 q4, [r0, #48] ; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vmov q7, q5 +; CHECK-NEXT: vldrw.u32 q7, [r0, #240] ; CHECK-NEXT: vldrw.u32 q3, [r0, #224] ; CHECK-NEXT: vldrw.u32 q1, [r0, #96] ; CHECK-NEXT: vldrw.u32 q5, [r0, #112] @@ -885,22 +883,20 @@ define void @vst4_v16f32(ptr %src, ptr %dst) { ; CHECK-NEXT: sub sp, #192 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] ; CHECK-NEXT: add r2, sp, #64 -; CHECK-NEXT: vldrw.u32 q4, [r0, #176] ; CHECK-NEXT: vldrw.u32 q3, [r0, #208] ; CHECK-NEXT: vldrw.u32 q2, [r0, #144] ; CHECK-NEXT: vldrw.u32 q1, [r0, #80] ; CHECK-NEXT: vstmia r2, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: vldrw.u32 q2, [r0, #128] -; CHECK-NEXT: vldrw.u32 q5, [r0, #240] -; CHECK-NEXT: vmov q6, q4 ; CHECK-NEXT: vldrw.u32 q3, [r0, #192] ; CHECK-NEXT: vldrw.u32 q1, [r0, #64] ; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill +; CHECK-NEXT: vldrw.u32 q6, [r0, #176] ; CHECK-NEXT: vldrw.u32 q2, [r0, #160] ; CHECK-NEXT: vldrw.u32 q4, [r0, #48] ; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vmov q7, q5 +; CHECK-NEXT: vldrw.u32 q7, [r0, #240] ; CHECK-NEXT: vldrw.u32 q3, [r0, #224] ; CHECK-NEXT: vldrw.u32 q1, [r0, #96] ; CHECK-NEXT: vldrw.u32 q5, [r0, #112] |
