summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/load-constant-i8.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i8.ll430
1 files changed, 211 insertions, 219 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index 889755c23bbc..9000cee7ef9d 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -4093,84 +4093,80 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: v_lshrrev_b16 v0, 8, s15
; GFX12-NEXT: v_lshrrev_b16 v9, 8, s2
; GFX12-NEXT: v_lshrrev_b16 v5, 8, s1
-; GFX12-NEXT: s_lshr_b32 s34, s15, 24
-; GFX12-NEXT: s_and_b32 s50, s15, 0xff
-; GFX12-NEXT: s_bfe_u32 s15, s15, 0x80010
-; GFX12-NEXT: s_lshr_b32 s27, s9, 24
; GFX12-NEXT: v_lshrrev_b16 v2, 8, s14
+; GFX12-NEXT: v_lshrrev_b16 v15, 8, s4
+; GFX12-NEXT: v_lshrrev_b16 v13, 8, s3
; GFX12-NEXT: v_lshrrev_b16 v3, 8, s13
-; GFX12-NEXT: v_lshrrev_b16 v6, 8, s11
-; GFX12-NEXT: v_lshrrev_b16 v8, 8, s9
; GFX12-NEXT: v_lshrrev_b16 v12, 8, s6
; GFX12-NEXT: v_lshrrev_b16 v14, 8, s5
-; GFX12-NEXT: v_lshrrev_b16 v15, 8, s4
-; GFX12-NEXT: v_lshrrev_b16 v13, 8, s3
-; GFX12-NEXT: v_lshrrev_b16 v1, 8, s0
-; GFX12-NEXT: v_dual_mov_b32 v52, 0 :: v_dual_and_b32 v5, 0xffff, v5
-; GFX12-NEXT: v_dual_mov_b32 v48, s50 :: v_dual_and_b32 v9, 0xffff, v9
-; GFX12-NEXT: v_mov_b32_e32 v50, s15
-; GFX12-NEXT: s_and_b32 s44, s9, 0xff
-; GFX12-NEXT: s_bfe_u32 s9, s9, 0x80010
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT: v_dual_mov_b32 v36, s9 :: v_dual_and_b32 v49, 0xffff, v0
-; GFX12-NEXT: v_mov_b32_e32 v51, s34
+; GFX12-NEXT: s_lshr_b32 s34, s15, 24
; GFX12-NEXT: v_lshrrev_b16 v4, 8, s12
; GFX12-NEXT: v_lshrrev_b16 v10, 8, s8
; GFX12-NEXT: v_lshrrev_b16 v11, 8, s7
-; GFX12-NEXT: s_lshr_b32 s31, s13, 24
+; GFX12-NEXT: s_and_b32 s50, s15, 0xff
+; GFX12-NEXT: s_bfe_u32 s15, s15, 0x80010
; GFX12-NEXT: s_lshr_b32 s33, s14, 24
-; GFX12-NEXT: s_and_b32 s43, s8, 0xff
-; GFX12-NEXT: s_and_b32 s48, s13, 0xff
; GFX12-NEXT: s_and_b32 s49, s14, 0xff
; GFX12-NEXT: s_bfe_u32 s14, s14, 0x80010
+; GFX12-NEXT: s_lshr_b32 s26, s8, 24
+; GFX12-NEXT: s_lshr_b32 s31, s13, 24
+; GFX12-NEXT: v_lshrrev_b16 v6, 8, s11
+; GFX12-NEXT: v_lshrrev_b16 v7, 8, s10
+; GFX12-NEXT: v_lshrrev_b16 v8, 8, s9
+; GFX12-NEXT: v_lshrrev_b16 v1, 8, s0
+; GFX12-NEXT: v_dual_mov_b32 v60, 0 :: v_dual_and_b32 v5, 0xffff, v5
+; GFX12-NEXT: v_dual_mov_b32 v56, s50 :: v_dual_and_b32 v9, 0xffff, v9
+; GFX12-NEXT: v_mov_b32_e32 v58, s15
+; GFX12-NEXT: s_and_b32 s43, s8, 0xff
+; GFX12-NEXT: s_bfe_u32 s8, s8, 0x80010
+; GFX12-NEXT: s_and_b32 s48, s13, 0xff
; GFX12-NEXT: s_bfe_u32 s13, s13, 0x80010
+; GFX12-NEXT: v_dual_mov_b32 v30, s43 :: v_dual_and_b32 v57, 0xffff, v0
+; GFX12-NEXT: v_dual_mov_b32 v59, s34 :: v_dual_mov_b32 v32, s8
+; GFX12-NEXT: s_lshr_b32 s27, s9, 24
; GFX12-NEXT: s_lshr_b32 s30, s12, 24
+; GFX12-NEXT: v_dual_mov_b32 v52, s49 :: v_dual_and_b32 v13, 0xffff, v13
+; GFX12-NEXT: v_dual_mov_b32 v54, s14 :: v_dual_and_b32 v17, 0xffff, v15
+; GFX12-NEXT: s_and_b32 s42, s7, 0xff
+; GFX12-NEXT: s_and_b32 s44, s9, 0xff
+; GFX12-NEXT: s_bfe_u32 s9, s9, 0x80010
; GFX12-NEXT: s_and_b32 s47, s12, 0xff
; GFX12-NEXT: s_bfe_u32 s12, s12, 0x80010
-; GFX12-NEXT: s_lshr_b32 s26, s8, 24
-; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX12-NEXT: v_dual_mov_b32 v44, s49 :: v_dual_and_b32 v13, 0xffff, v13
-; GFX12-NEXT: v_dual_mov_b32 v46, s14 :: v_dual_and_b32 v17, 0xffff, v15
-; GFX12-NEXT: v_and_b32_e32 v21, 0xffff, v14
-; GFX12-NEXT: v_and_b32_e32 v23, 0xffff, v12
-; GFX12-NEXT: s_and_b32 s42, s7, 0xff
-; GFX12-NEXT: s_bfe_u32 s8, s8, 0x80010
-; GFX12-NEXT: v_dual_mov_b32 v34, s44 :: v_dual_and_b32 v45, 0xffff, v2
-; GFX12-NEXT: v_dual_mov_b32 v47, s33 :: v_dual_mov_b32 v30, s43
-; GFX12-NEXT: global_store_b128 v52, v[48:51], s[16:17] offset:240
-; GFX12-NEXT: v_dual_mov_b32 v32, s8 :: v_dual_and_b32 v49, 0xffff, v3
-; GFX12-NEXT: v_mov_b32_e32 v48, s48
-; GFX12-NEXT: v_dual_mov_b32 v50, s13 :: v_dual_mov_b32 v51, s31
-; GFX12-NEXT: v_mov_b32_e32 v26, s42
+; GFX12-NEXT: v_dual_mov_b32 v36, s9 :: v_dual_and_b32 v53, 0xffff, v2
+; GFX12-NEXT: v_dual_mov_b32 v55, s33 :: v_dual_mov_b32 v26, s42
; GFX12-NEXT: s_lshr_b32 s25, s7, 24
-; GFX12-NEXT: s_lshr_b32 s28, s10, 24
-; GFX12-NEXT: v_lshrrev_b16 v7, 8, s10
-; GFX12-NEXT: v_dual_mov_b32 v40, s47 :: v_dual_and_b32 v27, 0xffff, v11
+; GFX12-NEXT: v_dual_mov_b32 v48, s48 :: v_dual_and_b32 v21, 0xffff, v14
+; GFX12-NEXT: v_dual_mov_b32 v50, s13 :: v_dual_and_b32 v23, 0xffff, v12
; GFX12-NEXT: s_bfe_u32 s7, s7, 0x80010
-; GFX12-NEXT: v_dual_mov_b32 v42, s12 :: v_dual_and_b32 v31, 0xffff, v10
+; GFX12-NEXT: v_dual_mov_b32 v34, s44 :: v_dual_and_b32 v49, 0xffff, v3
+; GFX12-NEXT: v_dual_mov_b32 v51, s31 :: v_dual_mov_b32 v28, s7
+; GFX12-NEXT: s_lshr_b32 s28, s10, 24
+; GFX12-NEXT: s_lshr_b32 s29, s11, 24
+; GFX12-NEXT: s_and_b32 s41, s6, 0xff
+; GFX12-NEXT: v_dual_mov_b32 v44, s47 :: v_dual_and_b32 v27, 0xffff, v11
+; GFX12-NEXT: v_dual_mov_b32 v46, s12 :: v_dual_and_b32 v31, 0xffff, v10
; GFX12-NEXT: s_and_b32 s45, s10, 0xff
; GFX12-NEXT: s_bfe_u32 s10, s10, 0x80010
-; GFX12-NEXT: v_and_b32_e32 v41, 0xffff, v4
-; GFX12-NEXT: v_dual_mov_b32 v43, s30 :: v_dual_mov_b32 v28, s7
-; GFX12-NEXT: s_lshr_b32 s29, s11, 24
; GFX12-NEXT: s_and_b32 s46, s11, 0xff
+; GFX12-NEXT: v_dual_mov_b32 v40, s10 :: v_dual_and_b32 v45, 0xffff, v4
+; GFX12-NEXT: v_dual_mov_b32 v47, s30 :: v_dual_mov_b32 v22, s41
; GFX12-NEXT: s_bfe_u32 s11, s11, 0x80010
; GFX12-NEXT: s_lshr_b32 s24, s6, 24
-; GFX12-NEXT: s_and_b32 s41, s6, 0xff
+; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX12-NEXT: s_bfe_u32 s6, s6, 0x80010
-; GFX12-NEXT: v_and_b32_e32 v35, 0xffff, v8
-; GFX12-NEXT: global_store_b128 v52, v[44:47], s[16:17] offset:224
-; GFX12-NEXT: v_mov_b32_e32 v46, s29
-; GFX12-NEXT: v_and_b32_e32 v44, 0xffff, v6
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: global_store_b128 v52, v[48:51], s[16:17] offset:208
-; GFX12-NEXT: global_store_b128 v52, v[40:43], s[16:17] offset:192
-; GFX12-NEXT: v_dual_mov_b32 v43, s46 :: v_dual_mov_b32 v22, s41
-; GFX12-NEXT: v_dual_mov_b32 v45, s11 :: v_dual_mov_b32 v24, s6
+; GFX12-NEXT: v_dual_mov_b32 v42, s46 :: v_dual_and_b32 v35, 0xffff, v8
+; GFX12-NEXT: v_and_b32_e32 v39, 0xffff, v7
+; GFX12-NEXT: v_dual_mov_b32 v38, s45 :: v_dual_and_b32 v43, 0xffff, v6
+; GFX12-NEXT: s_clause 0x3
+; GFX12-NEXT: global_store_b128 v60, v[56:59], s[16:17] offset:240
+; GFX12-NEXT: global_store_b128 v60, v[52:55], s[16:17] offset:224
+; GFX12-NEXT: global_store_b128 v60, v[48:51], s[16:17] offset:208
+; GFX12-NEXT: global_store_b128 v60, v[44:47], s[16:17] offset:192
+; GFX12-NEXT: v_dual_mov_b32 v44, s11 :: v_dual_mov_b32 v45, s29
+; GFX12-NEXT: v_mov_b32_e32 v24, s6
; GFX12-NEXT: s_and_b32 s40, s5, 0xff
-; GFX12-NEXT: v_dual_mov_b32 v38, s45 :: v_dual_and_b32 v39, 0xffff, v7
-; GFX12-NEXT: v_dual_mov_b32 v40, s10 :: v_dual_mov_b32 v41, s28
-; GFX12-NEXT: v_mov_b32_e32 v20, s40
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: v_dual_mov_b32 v41, s28 :: v_dual_mov_b32 v20, s40
; GFX12-NEXT: s_lshr_b32 s23, s5, 24
; GFX12-NEXT: s_bfe_u32 s5, s5, 0x80010
; GFX12-NEXT: v_mov_b32_e32 v37, s27
@@ -4184,12 +4180,12 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_u32 s3, s3, 0x80010
; GFX12-NEXT: v_dual_mov_b32 v25, s24 :: v_dual_mov_b32 v12, s38
; GFX12-NEXT: s_clause 0x5
-; GFX12-NEXT: global_store_b128 v52, v[43:46], s[16:17] offset:176
-; GFX12-NEXT: global_store_b128 v52, v[38:41], s[16:17] offset:160
-; GFX12-NEXT: global_store_b128 v52, v[34:37], s[16:17] offset:144
-; GFX12-NEXT: global_store_b128 v52, v[30:33], s[16:17] offset:128
-; GFX12-NEXT: global_store_b128 v52, v[26:29], s[16:17] offset:112
-; GFX12-NEXT: global_store_b128 v52, v[22:25], s[16:17] offset:96
+; GFX12-NEXT: global_store_b128 v60, v[42:45], s[16:17] offset:176
+; GFX12-NEXT: global_store_b128 v60, v[38:41], s[16:17] offset:160
+; GFX12-NEXT: global_store_b128 v60, v[34:37], s[16:17] offset:144
+; GFX12-NEXT: global_store_b128 v60, v[30:33], s[16:17] offset:128
+; GFX12-NEXT: global_store_b128 v60, v[26:29], s[16:17] offset:112
+; GFX12-NEXT: global_store_b128 v60, v[22:25], s[16:17] offset:96
; GFX12-NEXT: v_dual_mov_b32 v22, s5 :: v_dual_mov_b32 v23, s23
; GFX12-NEXT: v_mov_b32_e32 v14, s3
; GFX12-NEXT: s_lshr_b32 s20, s2, 24
@@ -4208,12 +4204,12 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: v_dual_mov_b32 v0, s35 :: v_dual_mov_b32 v3, s18
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: s_clause 0x5
-; GFX12-NEXT: global_store_b128 v52, v[20:23], s[16:17] offset:80
-; GFX12-NEXT: global_store_b128 v52, v[16:19], s[16:17] offset:64
-; GFX12-NEXT: global_store_b128 v52, v[12:15], s[16:17] offset:48
-; GFX12-NEXT: global_store_b128 v52, v[8:11], s[16:17] offset:32
-; GFX12-NEXT: global_store_b128 v52, v[4:7], s[16:17] offset:16
-; GFX12-NEXT: global_store_b128 v52, v[0:3], s[16:17]
+; GFX12-NEXT: global_store_b128 v60, v[20:23], s[16:17] offset:80
+; GFX12-NEXT: global_store_b128 v60, v[16:19], s[16:17] offset:64
+; GFX12-NEXT: global_store_b128 v60, v[12:15], s[16:17] offset:48
+; GFX12-NEXT: global_store_b128 v60, v[8:11], s[16:17] offset:32
+; GFX12-NEXT: global_store_b128 v60, v[4:7], s[16:17] offset:16
+; GFX12-NEXT: global_store_b128 v60, v[0:3], s[16:17]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-NEXT: s_endpgm
@@ -5056,97 +5052,95 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_lshrrev_b16 v0, 8, s15
-; GFX12-NEXT: s_ashr_i32 s49, s15, 24
-; GFX12-NEXT: s_bfe_i32 s50, s15, 0x80010
-; GFX12-NEXT: s_sext_i32_i8 s15, s15
; GFX12-NEXT: v_lshrrev_b16 v2, 8, s14
; GFX12-NEXT: v_lshrrev_b16 v3, 8, s13
-; GFX12-NEXT: v_dual_mov_b32 v54, 0 :: v_dual_mov_b32 v45, s49
; GFX12-NEXT: v_lshrrev_b16 v4, 8, s12
-; GFX12-NEXT: s_ashr_i32 s45, s13, 24
+; GFX12-NEXT: s_ashr_i32 s49, s15, 24
+; GFX12-NEXT: s_bfe_i32 s50, s15, 0x80010
+; GFX12-NEXT: s_sext_i32_i8 s15, s15
; GFX12-NEXT: s_ashr_i32 s47, s14, 24
-; GFX12-NEXT: v_bfe_i32 v43, v0, 0, 8
-; GFX12-NEXT: v_dual_mov_b32 v42, s15 :: v_dual_mov_b32 v49, s47
-; GFX12-NEXT: v_dual_mov_b32 v44, s50 :: v_dual_mov_b32 v53, s45
-; GFX12-NEXT: s_bfe_i32 s46, s13, 0x80010
-; GFX12-NEXT: s_sext_i32_i8 s13, s13
; GFX12-NEXT: s_bfe_i32 s48, s14, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s14, s14
+; GFX12-NEXT: v_dual_mov_b32 v59, 0 :: v_dual_mov_b32 v52, s15
+; GFX12-NEXT: v_lshrrev_b16 v6, 8, s11
+; GFX12-NEXT: s_ashr_i32 s45, s13, 24
+; GFX12-NEXT: s_bfe_i32 s46, s13, 0x80010
+; GFX12-NEXT: s_sext_i32_i8 s13, s13
+; GFX12-NEXT: v_bfe_i32 v53, v0, 0, 8
+; GFX12-NEXT: v_dual_mov_b32 v54, s50 :: v_dual_mov_b32 v55, s49
+; GFX12-NEXT: v_lshrrev_b16 v7, 8, s10
; GFX12-NEXT: s_ashr_i32 s43, s12, 24
; GFX12-NEXT: s_bfe_i32 s44, s12, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s12, s12
-; GFX12-NEXT: v_lshrrev_b16 v6, 8, s11
-; GFX12-NEXT: s_ashr_i32 s39, s10, 24
+; GFX12-NEXT: v_bfe_i32 v49, v2, 0, 8
+; GFX12-NEXT: v_dual_mov_b32 v48, s14 :: v_dual_mov_b32 v51, s47
+; GFX12-NEXT: v_dual_mov_b32 v50, s48 :: v_dual_mov_b32 v47, s45
+; GFX12-NEXT: v_lshrrev_b16 v8, 8, s9
; GFX12-NEXT: s_ashr_i32 s41, s11, 24
; GFX12-NEXT: s_bfe_i32 s42, s11, 0x80010
-; GFX12-NEXT: v_bfe_i32 v47, v2, 0, 8
-; GFX12-NEXT: v_mov_b32_e32 v46, s14
-; GFX12-NEXT: v_bfe_i32 v51, v3, 0, 8
-; GFX12-NEXT: v_mov_b32_e32 v50, s13
-; GFX12-NEXT: v_mov_b32_e32 v52, s46
; GFX12-NEXT: s_sext_i32_i8 s11, s11
-; GFX12-NEXT: v_mov_b32_e32 v48, s48
-; GFX12-NEXT: v_lshrrev_b16 v7, 8, s10
-; GFX12-NEXT: s_ashr_i32 s35, s8, 24
-; GFX12-NEXT: s_ashr_i32 s37, s9, 24
-; GFX12-NEXT: v_bfe_i32 v41, v4, 0, 8
-; GFX12-NEXT: global_store_b128 v54, v[42:45], s[16:17] offset:240
-; GFX12-NEXT: v_dual_mov_b32 v40, s12 :: v_dual_mov_b32 v37, s37
-; GFX12-NEXT: v_dual_mov_b32 v42, s44 :: v_dual_mov_b32 v33, s35
-; GFX12-NEXT: v_mov_b32_e32 v43, s43
-; GFX12-NEXT: v_lshrrev_b16 v8, 8, s9
+; GFX12-NEXT: v_bfe_i32 v45, v3, 0, 8
+; GFX12-NEXT: v_dual_mov_b32 v44, s13 :: v_dual_mov_b32 v43, s43
+; GFX12-NEXT: v_mov_b32_e32 v46, s46
; GFX12-NEXT: v_lshrrev_b16 v10, 8, s8
+; GFX12-NEXT: s_ashr_i32 s39, s10, 24
+; GFX12-NEXT: v_bfe_i32 v41, v4, 0, 8
+; GFX12-NEXT: v_dual_mov_b32 v40, s12 :: v_dual_mov_b32 v57, s42
+; GFX12-NEXT: v_mov_b32_e32 v42, s44
; GFX12-NEXT: v_lshrrev_b16 v11, 8, s7
; GFX12-NEXT: v_lshrrev_b16 v12, 8, s6
; GFX12-NEXT: s_bfe_i32 s40, s10, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s10, s10
; GFX12-NEXT: v_lshrrev_b16 v14, 8, s5
; GFX12-NEXT: v_lshrrev_b16 v15, 8, s4
-; GFX12-NEXT: s_ashr_i32 s33, s7, 24
+; GFX12-NEXT: s_ashr_i32 s37, s9, 24
; GFX12-NEXT: s_bfe_i32 s38, s9, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s9, s9
-; GFX12-NEXT: global_store_b128 v54, v[46:49], s[16:17] offset:224
-; GFX12-NEXT: v_bfe_i32 v44, v6, 0, 8
-; GFX12-NEXT: v_mov_b32_e32 v45, s42
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: global_store_b128 v54, v[50:53], s[16:17] offset:208
-; GFX12-NEXT: global_store_b128 v54, v[40:43], s[16:17] offset:192
-; GFX12-NEXT: v_dual_mov_b32 v43, s11 :: v_dual_mov_b32 v46, s41
-; GFX12-NEXT: v_mov_b32_e32 v29, s33
-; GFX12-NEXT: s_ashr_i32 s28, s5, 24
-; GFX12-NEXT: s_ashr_i32 s30, s6, 24
+; GFX12-NEXT: v_bfe_i32 v56, v6, 0, 8
+; GFX12-NEXT: s_clause 0x3
+; GFX12-NEXT: global_store_b128 v59, v[52:55], s[16:17] offset:240
+; GFX12-NEXT: global_store_b128 v59, v[48:51], s[16:17] offset:224
+; GFX12-NEXT: global_store_b128 v59, v[44:47], s[16:17] offset:208
+; GFX12-NEXT: global_store_b128 v59, v[40:43], s[16:17] offset:192
+; GFX12-NEXT: v_mov_b32_e32 v41, s39
+; GFX12-NEXT: v_dual_mov_b32 v55, s11 :: v_dual_mov_b32 v58, s41
+; GFX12-NEXT: v_mov_b32_e32 v37, s37
+; GFX12-NEXT: s_ashr_i32 s33, s7, 24
+; GFX12-NEXT: s_ashr_i32 s35, s8, 24
; GFX12-NEXT: s_bfe_i32 s36, s8, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s8, s8
; GFX12-NEXT: v_bfe_i32 v39, v7, 0, 8
-; GFX12-NEXT: v_dual_mov_b32 v38, s10 :: v_dual_mov_b32 v25, s30
-; GFX12-NEXT: v_dual_mov_b32 v40, s40 :: v_dual_mov_b32 v41, s39
+; GFX12-NEXT: v_dual_mov_b32 v38, s10 :: v_dual_mov_b32 v33, s35
+; GFX12-NEXT: v_dual_mov_b32 v40, s40 :: v_dual_mov_b32 v29, s33
; GFX12-NEXT: v_lshrrev_b16 v13, 8, s3
-; GFX12-NEXT: s_ashr_i32 s24, s3, 24
-; GFX12-NEXT: s_ashr_i32 s26, s4, 24
+; GFX12-NEXT: s_ashr_i32 s28, s5, 24
+; GFX12-NEXT: s_ashr_i32 s30, s6, 24
; GFX12-NEXT: s_bfe_i32 s31, s6, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s6, s6
; GFX12-NEXT: s_bfe_i32 s34, s7, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s7, s7
; GFX12-NEXT: v_bfe_i32 v35, v8, 0, 8
-; GFX12-NEXT: v_dual_mov_b32 v34, s9 :: v_dual_mov_b32 v19, s26
+; GFX12-NEXT: v_dual_mov_b32 v34, s9 :: v_dual_mov_b32 v25, s30
; GFX12-NEXT: v_mov_b32_e32 v36, s38
; GFX12-NEXT: v_lshrrev_b16 v9, 8, s2
; GFX12-NEXT: s_ashr_i32 s18, s0, 24
; GFX12-NEXT: s_ashr_i32 s20, s1, 24
; GFX12-NEXT: s_ashr_i32 s22, s2, 24
+; GFX12-NEXT: s_ashr_i32 s24, s3, 24
+; GFX12-NEXT: s_ashr_i32 s26, s4, 24
; GFX12-NEXT: s_bfe_i32 s29, s5, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s5, s5
; GFX12-NEXT: v_bfe_i32 v31, v10, 0, 8
-; GFX12-NEXT: v_mov_b32_e32 v30, s8
-; GFX12-NEXT: v_dual_mov_b32 v32, s36 :: v_dual_mov_b32 v7, s20
+; GFX12-NEXT: v_dual_mov_b32 v30, s8 :: v_dual_mov_b32 v19, s26
+; GFX12-NEXT: v_mov_b32_e32 v32, s36
; GFX12-NEXT: v_lshrrev_b16 v5, 8, s1
; GFX12-NEXT: s_bfe_i32 s27, s4, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s4, s4
; GFX12-NEXT: v_bfe_i32 v23, v12, 0, 8
; GFX12-NEXT: v_bfe_i32 v27, v11, 0, 8
-; GFX12-NEXT: v_dual_mov_b32 v26, s7 :: v_dual_mov_b32 v3, s18
-; GFX12-NEXT: v_mov_b32_e32 v28, s34
-; GFX12-NEXT: v_mov_b32_e32 v22, s6
+; GFX12-NEXT: v_dual_mov_b32 v26, s7 :: v_dual_mov_b32 v11, s22
+; GFX12-NEXT: v_dual_mov_b32 v28, s34 :: v_dual_mov_b32 v7, s20
+; GFX12-NEXT: v_dual_mov_b32 v22, s6 :: v_dual_mov_b32 v3, s18
; GFX12-NEXT: v_lshrrev_b16 v1, 8, s0
; GFX12-NEXT: s_bfe_i32 s25, s3, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s3, s3
@@ -5154,14 +5148,13 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: v_bfe_i32 v21, v14, 0, 8
; GFX12-NEXT: v_mov_b32_e32 v24, s31
; GFX12-NEXT: v_dual_mov_b32 v20, s5 :: v_dual_mov_b32 v15, s24
-; GFX12-NEXT: v_mov_b32_e32 v11, s22
; GFX12-NEXT: s_clause 0x5
-; GFX12-NEXT: global_store_b128 v54, v[43:46], s[16:17] offset:176
-; GFX12-NEXT: global_store_b128 v54, v[38:41], s[16:17] offset:160
-; GFX12-NEXT: global_store_b128 v54, v[34:37], s[16:17] offset:144
-; GFX12-NEXT: global_store_b128 v54, v[30:33], s[16:17] offset:128
-; GFX12-NEXT: global_store_b128 v54, v[26:29], s[16:17] offset:112
-; GFX12-NEXT: global_store_b128 v54, v[22:25], s[16:17] offset:96
+; GFX12-NEXT: global_store_b128 v59, v[55:58], s[16:17] offset:176
+; GFX12-NEXT: global_store_b128 v59, v[38:41], s[16:17] offset:160
+; GFX12-NEXT: global_store_b128 v59, v[34:37], s[16:17] offset:144
+; GFX12-NEXT: global_store_b128 v59, v[30:33], s[16:17] offset:128
+; GFX12-NEXT: global_store_b128 v59, v[26:29], s[16:17] offset:112
+; GFX12-NEXT: global_store_b128 v59, v[22:25], s[16:17] offset:96
; GFX12-NEXT: v_dual_mov_b32 v22, s29 :: v_dual_mov_b32 v23, s28
; GFX12-NEXT: s_bfe_i32 s23, s2, 0x80010
; GFX12-NEXT: s_sext_i32_i8 s2, s2
@@ -5184,12 +5177,12 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: v_mov_b32_e32 v2, s19
; GFX12-NEXT: s_clause 0x5
-; GFX12-NEXT: global_store_b128 v54, v[20:23], s[16:17] offset:80
-; GFX12-NEXT: global_store_b128 v54, v[16:19], s[16:17] offset:64
-; GFX12-NEXT: global_store_b128 v54, v[12:15], s[16:17] offset:48
-; GFX12-NEXT: global_store_b128 v54, v[8:11], s[16:17] offset:32
-; GFX12-NEXT: global_store_b128 v54, v[4:7], s[16:17] offset:16
-; GFX12-NEXT: global_store_b128 v54, v[0:3], s[16:17]
+; GFX12-NEXT: global_store_b128 v59, v[20:23], s[16:17] offset:80
+; GFX12-NEXT: global_store_b128 v59, v[16:19], s[16:17] offset:64
+; GFX12-NEXT: global_store_b128 v59, v[12:15], s[16:17] offset:48
+; GFX12-NEXT: global_store_b128 v59, v[8:11], s[16:17] offset:32
+; GFX12-NEXT: global_store_b128 v59, v[4:7], s[16:17] offset:16
+; GFX12-NEXT: global_store_b128 v59, v[0:3], s[16:17]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-NEXT: s_endpgm
@@ -8804,114 +8797,113 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_lshrrev_b16 v1, 8, s7
-; GFX12-NEXT: v_lshrrev_b16 v2, 8, s6
-; GFX12-NEXT: v_lshrrev_b16 v4, 8, s5
-; GFX12-NEXT: v_lshrrev_b16 v8, 8, s2
-; GFX12-NEXT: s_lshr_b32 s22, s7, 16
-; GFX12-NEXT: v_bfe_i32 v31, v1, 0, 8
-; GFX12-NEXT: s_lshr_b32 s40, s2, 24
+; GFX12-NEXT: v_lshrrev_b16 v0, 8, s7
+; GFX12-NEXT: v_lshrrev_b16 v3, 8, s5
+; GFX12-NEXT: v_lshrrev_b16 v7, 8, s2
+; GFX12-NEXT: v_lshrrev_b16 v1, 8, s6
+; GFX12-NEXT: v_lshrrev_b16 v4, 8, s4
+; GFX12-NEXT: v_lshrrev_b16 v6, 8, s1
+; GFX12-NEXT: v_lshrrev_b16 v5, 8, s3
+; GFX12-NEXT: v_lshrrev_b16 v2, 8, s0
+; GFX12-NEXT: s_lshr_b32 s20, s7, 16
+; GFX12-NEXT: s_lshr_b32 s24, s6, 24
+; GFX12-NEXT: s_lshr_b32 s26, s5, 16
+; GFX12-NEXT: s_lshr_b32 s36, s2, 16
+; GFX12-NEXT: s_lshr_b32 s38, s2, 24
+; GFX12-NEXT: v_bfe_i32 v10, v7, 0, 8
+; GFX12-NEXT: v_bfe_i32 v22, v3, 0, 8
+; GFX12-NEXT: v_bfe_i32 v30, v0, 0, 8
+; GFX12-NEXT: s_lshr_b32 s42, s0, 16
; GFX12-NEXT: s_mov_b32 s46, s7
-; GFX12-NEXT: v_lshrrev_b16 v5, 8, s4
-; GFX12-NEXT: v_lshrrev_b16 v7, 8, s1
-; GFX12-NEXT: s_lshr_b32 s24, s6, 16
-; GFX12-NEXT: s_lshr_b32 s42, s1, 16
-; GFX12-NEXT: s_ashr_i64 s[58:59], s[6:7], 56
-; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
-; GFX12-NEXT: v_lshrrev_b16 v6, 8, s3
-; GFX12-NEXT: v_lshrrev_b16 v3, 8, s0
-; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s22
-; GFX12-NEXT: s_lshr_b32 s26, s6, 24
-; GFX12-NEXT: s_lshr_b32 s28, s5, 16
-; GFX12-NEXT: s_lshr_b32 s38, s2, 16
-; GFX12-NEXT: v_bfe_i32 v11, v8, 0, 8
-; GFX12-NEXT: v_bfe_i32 v23, v4, 0, 8
-; GFX12-NEXT: v_bfe_i32 v27, v2, 0, 8
-; GFX12-NEXT: v_ashrrev_i32_e32 v32, 31, v31
-; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v34, s23 :: v_dual_mov_b32 v35, s58
-; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s24
-; GFX12-NEXT: v_dual_mov_b32 v56, s41 :: v_dual_mov_b32 v29, s46
-; GFX12-NEXT: v_mov_b32_e32 v30, s47
-; GFX12-NEXT: s_lshr_b32 s44, s0, 24
; GFX12-NEXT: s_mov_b32 s48, s5
; GFX12-NEXT: s_mov_b32 s50, s3
-; GFX12-NEXT: s_lshr_b32 s30, s4, 16
-; GFX12-NEXT: s_lshr_b32 s34, s4, 24
+; GFX12-NEXT: s_lshr_b32 s22, s6, 16
+; GFX12-NEXT: s_lshr_b32 s28, s4, 16
+; GFX12-NEXT: s_lshr_b32 s30, s4, 24
+; GFX12-NEXT: s_lshr_b32 s40, s1, 16
+; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000
; GFX12-NEXT: s_ashr_i64 s[54:55], s[2:3], 56
; GFX12-NEXT: s_ashr_i64 s[56:57], s[4:5], 56
-; GFX12-NEXT: v_bfe_i32 v7, v7, 0, 8
-; GFX12-NEXT: v_bfe_i32 v19, v5, 0, 8
+; GFX12-NEXT: s_ashr_i64 s[6:7], s[6:7], 56
+; GFX12-NEXT: v_bfe_i32 v6, v6, 0, 8
+; GFX12-NEXT: v_bfe_i32 v18, v4, 0, 8
+; GFX12-NEXT: v_bfe_i32 v26, v1, 0, 8
; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
-; GFX12-NEXT: s_lshr_b32 s36, s3, 16
+; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
+; GFX12-NEXT: s_lshr_b32 s34, s3, 16
+; GFX12-NEXT: s_lshr_b32 s44, s0, 24
; GFX12-NEXT: s_mov_b32 s52, s1
; GFX12-NEXT: s_bfe_i64 s[12:13], s[2:3], 0x80000
; GFX12-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000
; GFX12-NEXT: s_bfe_i64 s[2:3], s[50:51], 0x80000
; GFX12-NEXT: s_bfe_i64 s[4:5], s[48:49], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[6:7], s[44:45], 0x80000
-; GFX12-NEXT: s_lshr_b32 s20, s0, 16
+; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v64, 0 :: v_dual_mov_b32 v33, s21
; GFX12-NEXT: s_ashr_i64 s[18:19], s[0:1], 56
-; GFX12-NEXT: v_bfe_i32 v3, v3, 0, 8
-; GFX12-NEXT: v_bfe_i32 v15, v6, 0, 8
-; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
+; GFX12-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX12-NEXT: v_bfe_i32 v14, v5, 0, 8
+; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v38, s25 :: v_dual_mov_b32 v39, s26
-; GFX12-NEXT: v_dual_mov_b32 v40, s27 :: v_dual_mov_b32 v41, s28
-; GFX12-NEXT: v_dual_mov_b32 v42, s29 :: v_dual_mov_b32 v43, s56
-; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s30
-; GFX12-NEXT: v_dual_mov_b32 v52, s55 :: v_dual_mov_b32 v53, s38
-; GFX12-NEXT: v_dual_mov_b32 v54, s39 :: v_dual_mov_b32 v55, s40
+; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
+; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v32, s20 :: v_dual_mov_b32 v35, s7
+; GFX12-NEXT: v_dual_mov_b32 v34, s6 :: v_dual_mov_b32 v37, s23
+; GFX12-NEXT: v_dual_mov_b32 v38, s24 :: v_dual_mov_b32 v41, s27
+; GFX12-NEXT: v_dual_mov_b32 v40, s26 :: v_dual_mov_b32 v43, s57
+; GFX12-NEXT: v_dual_mov_b32 v42, s56 :: v_dual_mov_b32 v45, s29
+; GFX12-NEXT: v_dual_mov_b32 v50, s54 :: v_dual_mov_b32 v53, s37
+; GFX12-NEXT: v_dual_mov_b32 v52, s36 :: v_dual_mov_b32 v55, s39
+; GFX12-NEXT: v_dual_mov_b32 v54, s38 :: v_dual_mov_b32 v57, s41
; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000
; GFX12-NEXT: s_bfe_i64 s[0:1], s[52:53], 0x80000
-; GFX12-NEXT: v_ashrrev_i32_e32 v12, 31, v11
-; GFX12-NEXT: v_ashrrev_i32_e32 v24, 31, v23
-; GFX12-NEXT: v_ashrrev_i32_e32 v28, 31, v27
-; GFX12-NEXT: global_store_b128 v0, v[33:36], s[8:9] offset:240
-; GFX12-NEXT: v_mov_b32_e32 v33, s42
-; GFX12-NEXT: global_store_b128 v0, v[29:32], s[8:9] offset:224
-; GFX12-NEXT: v_dual_mov_b32 v25, s16 :: v_dual_mov_b32 v26, s17
-; GFX12-NEXT: v_dual_mov_b32 v32, s7 :: v_dual_mov_b32 v21, s4
-; GFX12-NEXT: v_dual_mov_b32 v22, s5 :: v_dual_mov_b32 v17, s14
-; GFX12-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v9, s12
-; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v5, s0
-; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
-; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
-; GFX12-NEXT: v_dual_mov_b32 v46, s31 :: v_dual_mov_b32 v47, s34
-; GFX12-NEXT: v_dual_mov_b32 v48, s35 :: v_dual_mov_b32 v49, s36
-; GFX12-NEXT: v_dual_mov_b32 v34, s43 :: v_dual_mov_b32 v35, s18
-; GFX12-NEXT: v_dual_mov_b32 v36, s19 :: v_dual_mov_b32 v29, s20
-; GFX12-NEXT: v_ashrrev_i32_e32 v8, 31, v7
-; GFX12-NEXT: v_ashrrev_i32_e32 v20, 31, v19
-; GFX12-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v13, s2
-; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v1, s10
-; GFX12-NEXT: v_dual_mov_b32 v50, s37 :: v_dual_mov_b32 v51, s54
-; GFX12-NEXT: v_dual_mov_b32 v30, s21 :: v_dual_mov_b32 v31, s6
-; GFX12-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; GFX12-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GFX12-NEXT: s_clause 0x7
-; GFX12-NEXT: global_store_b128 v0, v[37:40], s[8:9] offset:208
-; GFX12-NEXT: global_store_b128 v0, v[25:28], s[8:9] offset:192
-; GFX12-NEXT: global_store_b128 v0, v[41:44], s[8:9] offset:176
-; GFX12-NEXT: global_store_b128 v0, v[21:24], s[8:9] offset:160
-; GFX12-NEXT: global_store_b128 v0, v[45:48], s[8:9] offset:144
-; GFX12-NEXT: global_store_b128 v0, v[17:20], s[8:9] offset:128
-; GFX12-NEXT: global_store_b128 v0, v[49:52], s[8:9] offset:112
-; GFX12-NEXT: global_store_b128 v0, v[13:16], s[8:9] offset:96
-; GFX12-NEXT: v_mov_b32_e32 v2, s11
+; GFX12-NEXT: v_ashrrev_i32_e32 v11, 31, v10
+; GFX12-NEXT: v_ashrrev_i32_e32 v23, 31, v22
+; GFX12-NEXT: v_ashrrev_i32_e32 v31, 31, v30
+; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v60, s42 :: v_dual_mov_b32 v29, s47
+; GFX12-NEXT: v_dual_mov_b32 v28, s46 :: v_dual_mov_b32 v63, s45
+; GFX12-NEXT: v_dual_mov_b32 v24, s16 :: v_dual_mov_b32 v21, s5
+; GFX12-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v17, s15
+; GFX12-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v9, s13
+; GFX12-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v5, s1
+; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
+; GFX12-NEXT: v_dual_mov_b32 v36, s22 :: v_dual_mov_b32 v39, s25
+; GFX12-NEXT: v_dual_mov_b32 v44, s28 :: v_dual_mov_b32 v47, s31
+; GFX12-NEXT: v_dual_mov_b32 v46, s30 :: v_dual_mov_b32 v49, s35
+; GFX12-NEXT: v_dual_mov_b32 v56, s40 :: v_dual_mov_b32 v59, s19
+; GFX12-NEXT: v_dual_mov_b32 v58, s18 :: v_dual_mov_b32 v61, s43
+; GFX12-NEXT: v_ashrrev_i32_e32 v7, 31, v6
+; GFX12-NEXT: v_ashrrev_i32_e32 v19, 31, v18
+; GFX12-NEXT: v_ashrrev_i32_e32 v27, 31, v26
+; GFX12-NEXT: v_dual_mov_b32 v62, s44 :: v_dual_mov_b32 v25, s17
+; GFX12-NEXT: v_dual_mov_b32 v16, s14 :: v_dual_mov_b32 v13, s3
+; GFX12-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v1, s11
+; GFX12-NEXT: v_dual_mov_b32 v48, s34 :: v_dual_mov_b32 v51, s55
+; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
+; GFX12-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; GFX12-NEXT: s_clause 0x9
+; GFX12-NEXT: global_store_b128 v64, v[32:35], s[8:9] offset:240
+; GFX12-NEXT: global_store_b128 v64, v[28:31], s[8:9] offset:224
+; GFX12-NEXT: global_store_b128 v64, v[36:39], s[8:9] offset:208
+; GFX12-NEXT: global_store_b128 v64, v[24:27], s[8:9] offset:192
+; GFX12-NEXT: global_store_b128 v64, v[40:43], s[8:9] offset:176
+; GFX12-NEXT: global_store_b128 v64, v[20:23], s[8:9] offset:160
+; GFX12-NEXT: global_store_b128 v64, v[44:47], s[8:9] offset:144
+; GFX12-NEXT: global_store_b128 v64, v[16:19], s[8:9] offset:128
+; GFX12-NEXT: global_store_b128 v64, v[48:51], s[8:9] offset:112
+; GFX12-NEXT: global_store_b128 v64, v[12:15], s[8:9] offset:96
+; GFX12-NEXT: v_mov_b32_e32 v0, s10
; GFX12-NEXT: s_clause 0x5
-; GFX12-NEXT: global_store_b128 v0, v[53:56], s[8:9] offset:80
-; GFX12-NEXT: global_store_b128 v0, v[9:12], s[8:9] offset:64
-; GFX12-NEXT: global_store_b128 v0, v[33:36], s[8:9] offset:48
-; GFX12-NEXT: global_store_b128 v0, v[5:8], s[8:9] offset:32
-; GFX12-NEXT: global_store_b128 v0, v[29:32], s[8:9] offset:16
-; GFX12-NEXT: global_store_b128 v0, v[1:4], s[8:9]
+; GFX12-NEXT: global_store_b128 v64, v[52:55], s[8:9] offset:80
+; GFX12-NEXT: global_store_b128 v64, v[8:11], s[8:9] offset:64
+; GFX12-NEXT: global_store_b128 v64, v[56:59], s[8:9] offset:48
+; GFX12-NEXT: global_store_b128 v64, v[4:7], s[8:9] offset:32
+; GFX12-NEXT: global_store_b128 v64, v[60:63], s[8:9] offset:16
+; GFX12-NEXT: global_store_b128 v64, v[0:3], s[8:9]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-NEXT: s_endpgm