diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/load-constant-i8.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-constant-i8.ll | 430 |
1 files changed, 211 insertions, 219 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll index 889755c23bbc..9000cee7ef9d 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll @@ -4093,84 +4093,80 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o ; GFX12-NEXT: v_lshrrev_b16 v0, 8, s15 ; GFX12-NEXT: v_lshrrev_b16 v9, 8, s2 ; GFX12-NEXT: v_lshrrev_b16 v5, 8, s1 -; GFX12-NEXT: s_lshr_b32 s34, s15, 24 -; GFX12-NEXT: s_and_b32 s50, s15, 0xff -; GFX12-NEXT: s_bfe_u32 s15, s15, 0x80010 -; GFX12-NEXT: s_lshr_b32 s27, s9, 24 ; GFX12-NEXT: v_lshrrev_b16 v2, 8, s14 +; GFX12-NEXT: v_lshrrev_b16 v15, 8, s4 +; GFX12-NEXT: v_lshrrev_b16 v13, 8, s3 ; GFX12-NEXT: v_lshrrev_b16 v3, 8, s13 -; GFX12-NEXT: v_lshrrev_b16 v6, 8, s11 -; GFX12-NEXT: v_lshrrev_b16 v8, 8, s9 ; GFX12-NEXT: v_lshrrev_b16 v12, 8, s6 ; GFX12-NEXT: v_lshrrev_b16 v14, 8, s5 -; GFX12-NEXT: v_lshrrev_b16 v15, 8, s4 -; GFX12-NEXT: v_lshrrev_b16 v13, 8, s3 -; GFX12-NEXT: v_lshrrev_b16 v1, 8, s0 -; GFX12-NEXT: v_dual_mov_b32 v52, 0 :: v_dual_and_b32 v5, 0xffff, v5 -; GFX12-NEXT: v_dual_mov_b32 v48, s50 :: v_dual_and_b32 v9, 0xffff, v9 -; GFX12-NEXT: v_mov_b32_e32 v50, s15 -; GFX12-NEXT: s_and_b32 s44, s9, 0xff -; GFX12-NEXT: s_bfe_u32 s9, s9, 0x80010 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_dual_mov_b32 v36, s9 :: v_dual_and_b32 v49, 0xffff, v0 -; GFX12-NEXT: v_mov_b32_e32 v51, s34 +; GFX12-NEXT: s_lshr_b32 s34, s15, 24 ; GFX12-NEXT: v_lshrrev_b16 v4, 8, s12 ; GFX12-NEXT: v_lshrrev_b16 v10, 8, s8 ; GFX12-NEXT: v_lshrrev_b16 v11, 8, s7 -; GFX12-NEXT: s_lshr_b32 s31, s13, 24 +; GFX12-NEXT: s_and_b32 s50, s15, 0xff +; GFX12-NEXT: s_bfe_u32 s15, s15, 0x80010 ; GFX12-NEXT: s_lshr_b32 s33, s14, 24 -; GFX12-NEXT: s_and_b32 s43, s8, 0xff -; GFX12-NEXT: s_and_b32 s48, s13, 0xff ; GFX12-NEXT: s_and_b32 s49, s14, 0xff ; GFX12-NEXT: s_bfe_u32 s14, s14, 0x80010 +; GFX12-NEXT: s_lshr_b32 s26, s8, 24 +; GFX12-NEXT: s_lshr_b32 s31, s13, 24 +; GFX12-NEXT: v_lshrrev_b16 v6, 8, s11 +; GFX12-NEXT: v_lshrrev_b16 v7, 8, s10 +; GFX12-NEXT: v_lshrrev_b16 v8, 8, s9 +; GFX12-NEXT: v_lshrrev_b16 v1, 8, s0 +; GFX12-NEXT: v_dual_mov_b32 v60, 0 :: v_dual_and_b32 v5, 0xffff, v5 +; GFX12-NEXT: v_dual_mov_b32 v56, s50 :: v_dual_and_b32 v9, 0xffff, v9 +; GFX12-NEXT: v_mov_b32_e32 v58, s15 +; GFX12-NEXT: s_and_b32 s43, s8, 0xff +; GFX12-NEXT: s_bfe_u32 s8, s8, 0x80010 +; GFX12-NEXT: s_and_b32 s48, s13, 0xff ; GFX12-NEXT: s_bfe_u32 s13, s13, 0x80010 +; GFX12-NEXT: v_dual_mov_b32 v30, s43 :: v_dual_and_b32 v57, 0xffff, v0 +; GFX12-NEXT: v_dual_mov_b32 v59, s34 :: v_dual_mov_b32 v32, s8 +; GFX12-NEXT: s_lshr_b32 s27, s9, 24 ; GFX12-NEXT: s_lshr_b32 s30, s12, 24 +; GFX12-NEXT: v_dual_mov_b32 v52, s49 :: v_dual_and_b32 v13, 0xffff, v13 +; GFX12-NEXT: v_dual_mov_b32 v54, s14 :: v_dual_and_b32 v17, 0xffff, v15 +; GFX12-NEXT: s_and_b32 s42, s7, 0xff +; GFX12-NEXT: s_and_b32 s44, s9, 0xff +; GFX12-NEXT: s_bfe_u32 s9, s9, 0x80010 ; GFX12-NEXT: s_and_b32 s47, s12, 0xff ; GFX12-NEXT: s_bfe_u32 s12, s12, 0x80010 -; GFX12-NEXT: s_lshr_b32 s26, s8, 24 -; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX12-NEXT: v_dual_mov_b32 v44, s49 :: v_dual_and_b32 v13, 0xffff, v13 -; GFX12-NEXT: v_dual_mov_b32 v46, s14 :: v_dual_and_b32 v17, 0xffff, v15 -; GFX12-NEXT: v_and_b32_e32 v21, 0xffff, v14 -; GFX12-NEXT: v_and_b32_e32 v23, 0xffff, v12 -; GFX12-NEXT: s_and_b32 s42, s7, 0xff -; GFX12-NEXT: s_bfe_u32 s8, s8, 0x80010 -; GFX12-NEXT: v_dual_mov_b32 v34, s44 :: v_dual_and_b32 v45, 0xffff, v2 -; GFX12-NEXT: v_dual_mov_b32 v47, s33 :: v_dual_mov_b32 v30, s43 -; GFX12-NEXT: global_store_b128 v52, v[48:51], s[16:17] offset:240 -; GFX12-NEXT: v_dual_mov_b32 v32, s8 :: v_dual_and_b32 v49, 0xffff, v3 -; GFX12-NEXT: v_mov_b32_e32 v48, s48 -; GFX12-NEXT: v_dual_mov_b32 v50, s13 :: v_dual_mov_b32 v51, s31 -; GFX12-NEXT: v_mov_b32_e32 v26, s42 +; GFX12-NEXT: v_dual_mov_b32 v36, s9 :: v_dual_and_b32 v53, 0xffff, v2 +; GFX12-NEXT: v_dual_mov_b32 v55, s33 :: v_dual_mov_b32 v26, s42 ; GFX12-NEXT: s_lshr_b32 s25, s7, 24 -; GFX12-NEXT: s_lshr_b32 s28, s10, 24 -; GFX12-NEXT: v_lshrrev_b16 v7, 8, s10 -; GFX12-NEXT: v_dual_mov_b32 v40, s47 :: v_dual_and_b32 v27, 0xffff, v11 +; GFX12-NEXT: v_dual_mov_b32 v48, s48 :: v_dual_and_b32 v21, 0xffff, v14 +; GFX12-NEXT: v_dual_mov_b32 v50, s13 :: v_dual_and_b32 v23, 0xffff, v12 ; GFX12-NEXT: s_bfe_u32 s7, s7, 0x80010 -; GFX12-NEXT: v_dual_mov_b32 v42, s12 :: v_dual_and_b32 v31, 0xffff, v10 +; GFX12-NEXT: v_dual_mov_b32 v34, s44 :: v_dual_and_b32 v49, 0xffff, v3 +; GFX12-NEXT: v_dual_mov_b32 v51, s31 :: v_dual_mov_b32 v28, s7 +; GFX12-NEXT: s_lshr_b32 s28, s10, 24 +; GFX12-NEXT: s_lshr_b32 s29, s11, 24 +; GFX12-NEXT: s_and_b32 s41, s6, 0xff +; GFX12-NEXT: v_dual_mov_b32 v44, s47 :: v_dual_and_b32 v27, 0xffff, v11 +; GFX12-NEXT: v_dual_mov_b32 v46, s12 :: v_dual_and_b32 v31, 0xffff, v10 ; GFX12-NEXT: s_and_b32 s45, s10, 0xff ; GFX12-NEXT: s_bfe_u32 s10, s10, 0x80010 -; GFX12-NEXT: v_and_b32_e32 v41, 0xffff, v4 -; GFX12-NEXT: v_dual_mov_b32 v43, s30 :: v_dual_mov_b32 v28, s7 -; GFX12-NEXT: s_lshr_b32 s29, s11, 24 ; GFX12-NEXT: s_and_b32 s46, s11, 0xff +; GFX12-NEXT: v_dual_mov_b32 v40, s10 :: v_dual_and_b32 v45, 0xffff, v4 +; GFX12-NEXT: v_dual_mov_b32 v47, s30 :: v_dual_mov_b32 v22, s41 ; GFX12-NEXT: s_bfe_u32 s11, s11, 0x80010 ; GFX12-NEXT: s_lshr_b32 s24, s6, 24 -; GFX12-NEXT: s_and_b32 s41, s6, 0xff +; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX12-NEXT: s_bfe_u32 s6, s6, 0x80010 -; GFX12-NEXT: v_and_b32_e32 v35, 0xffff, v8 -; GFX12-NEXT: global_store_b128 v52, v[44:47], s[16:17] offset:224 -; GFX12-NEXT: v_mov_b32_e32 v46, s29 -; GFX12-NEXT: v_and_b32_e32 v44, 0xffff, v6 -; GFX12-NEXT: s_clause 0x1 -; GFX12-NEXT: global_store_b128 v52, v[48:51], s[16:17] offset:208 -; GFX12-NEXT: global_store_b128 v52, v[40:43], s[16:17] offset:192 -; GFX12-NEXT: v_dual_mov_b32 v43, s46 :: v_dual_mov_b32 v22, s41 -; GFX12-NEXT: v_dual_mov_b32 v45, s11 :: v_dual_mov_b32 v24, s6 +; GFX12-NEXT: v_dual_mov_b32 v42, s46 :: v_dual_and_b32 v35, 0xffff, v8 +; GFX12-NEXT: v_and_b32_e32 v39, 0xffff, v7 +; GFX12-NEXT: v_dual_mov_b32 v38, s45 :: v_dual_and_b32 v43, 0xffff, v6 +; GFX12-NEXT: s_clause 0x3 +; GFX12-NEXT: global_store_b128 v60, v[56:59], s[16:17] offset:240 +; GFX12-NEXT: global_store_b128 v60, v[52:55], s[16:17] offset:224 +; GFX12-NEXT: global_store_b128 v60, v[48:51], s[16:17] offset:208 +; GFX12-NEXT: global_store_b128 v60, v[44:47], s[16:17] offset:192 +; GFX12-NEXT: v_dual_mov_b32 v44, s11 :: v_dual_mov_b32 v45, s29 +; GFX12-NEXT: v_mov_b32_e32 v24, s6 ; GFX12-NEXT: s_and_b32 s40, s5, 0xff -; GFX12-NEXT: v_dual_mov_b32 v38, s45 :: v_dual_and_b32 v39, 0xffff, v7 -; GFX12-NEXT: v_dual_mov_b32 v40, s10 :: v_dual_mov_b32 v41, s28 -; GFX12-NEXT: v_mov_b32_e32 v20, s40 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v41, s28 :: v_dual_mov_b32 v20, s40 ; GFX12-NEXT: s_lshr_b32 s23, s5, 24 ; GFX12-NEXT: s_bfe_u32 s5, s5, 0x80010 ; GFX12-NEXT: v_mov_b32_e32 v37, s27 @@ -4184,12 +4180,12 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o ; GFX12-NEXT: s_bfe_u32 s3, s3, 0x80010 ; GFX12-NEXT: v_dual_mov_b32 v25, s24 :: v_dual_mov_b32 v12, s38 ; GFX12-NEXT: s_clause 0x5 -; GFX12-NEXT: global_store_b128 v52, v[43:46], s[16:17] offset:176 -; GFX12-NEXT: global_store_b128 v52, v[38:41], s[16:17] offset:160 -; GFX12-NEXT: global_store_b128 v52, v[34:37], s[16:17] offset:144 -; GFX12-NEXT: global_store_b128 v52, v[30:33], s[16:17] offset:128 -; GFX12-NEXT: global_store_b128 v52, v[26:29], s[16:17] offset:112 -; GFX12-NEXT: global_store_b128 v52, v[22:25], s[16:17] offset:96 +; GFX12-NEXT: global_store_b128 v60, v[42:45], s[16:17] offset:176 +; GFX12-NEXT: global_store_b128 v60, v[38:41], s[16:17] offset:160 +; GFX12-NEXT: global_store_b128 v60, v[34:37], s[16:17] offset:144 +; GFX12-NEXT: global_store_b128 v60, v[30:33], s[16:17] offset:128 +; GFX12-NEXT: global_store_b128 v60, v[26:29], s[16:17] offset:112 +; GFX12-NEXT: global_store_b128 v60, v[22:25], s[16:17] offset:96 ; GFX12-NEXT: v_dual_mov_b32 v22, s5 :: v_dual_mov_b32 v23, s23 ; GFX12-NEXT: v_mov_b32_e32 v14, s3 ; GFX12-NEXT: s_lshr_b32 s20, s2, 24 @@ -4208,12 +4204,12 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o ; GFX12-NEXT: v_dual_mov_b32 v0, s35 :: v_dual_mov_b32 v3, s18 ; GFX12-NEXT: v_mov_b32_e32 v2, s0 ; GFX12-NEXT: s_clause 0x5 -; GFX12-NEXT: global_store_b128 v52, v[20:23], s[16:17] offset:80 -; GFX12-NEXT: global_store_b128 v52, v[16:19], s[16:17] offset:64 -; GFX12-NEXT: global_store_b128 v52, v[12:15], s[16:17] offset:48 -; GFX12-NEXT: global_store_b128 v52, v[8:11], s[16:17] offset:32 -; GFX12-NEXT: global_store_b128 v52, v[4:7], s[16:17] offset:16 -; GFX12-NEXT: global_store_b128 v52, v[0:3], s[16:17] +; GFX12-NEXT: global_store_b128 v60, v[20:23], s[16:17] offset:80 +; GFX12-NEXT: global_store_b128 v60, v[16:19], s[16:17] offset:64 +; GFX12-NEXT: global_store_b128 v60, v[12:15], s[16:17] offset:48 +; GFX12-NEXT: global_store_b128 v60, v[8:11], s[16:17] offset:32 +; GFX12-NEXT: global_store_b128 v60, v[4:7], s[16:17] offset:16 +; GFX12-NEXT: global_store_b128 v60, v[0:3], s[16:17] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm @@ -5056,97 +5052,95 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o ; GFX12-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_lshrrev_b16 v0, 8, s15 -; GFX12-NEXT: s_ashr_i32 s49, s15, 24 -; GFX12-NEXT: s_bfe_i32 s50, s15, 0x80010 -; GFX12-NEXT: s_sext_i32_i8 s15, s15 ; GFX12-NEXT: v_lshrrev_b16 v2, 8, s14 ; GFX12-NEXT: v_lshrrev_b16 v3, 8, s13 -; GFX12-NEXT: v_dual_mov_b32 v54, 0 :: v_dual_mov_b32 v45, s49 ; GFX12-NEXT: v_lshrrev_b16 v4, 8, s12 -; GFX12-NEXT: s_ashr_i32 s45, s13, 24 +; GFX12-NEXT: s_ashr_i32 s49, s15, 24 +; GFX12-NEXT: s_bfe_i32 s50, s15, 0x80010 +; GFX12-NEXT: s_sext_i32_i8 s15, s15 ; GFX12-NEXT: s_ashr_i32 s47, s14, 24 -; GFX12-NEXT: v_bfe_i32 v43, v0, 0, 8 -; GFX12-NEXT: v_dual_mov_b32 v42, s15 :: v_dual_mov_b32 v49, s47 -; GFX12-NEXT: v_dual_mov_b32 v44, s50 :: v_dual_mov_b32 v53, s45 -; GFX12-NEXT: s_bfe_i32 s46, s13, 0x80010 -; GFX12-NEXT: s_sext_i32_i8 s13, s13 ; GFX12-NEXT: s_bfe_i32 s48, s14, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s14, s14 +; GFX12-NEXT: v_dual_mov_b32 v59, 0 :: v_dual_mov_b32 v52, s15 +; GFX12-NEXT: v_lshrrev_b16 v6, 8, s11 +; GFX12-NEXT: s_ashr_i32 s45, s13, 24 +; GFX12-NEXT: s_bfe_i32 s46, s13, 0x80010 +; GFX12-NEXT: s_sext_i32_i8 s13, s13 +; GFX12-NEXT: v_bfe_i32 v53, v0, 0, 8 +; GFX12-NEXT: v_dual_mov_b32 v54, s50 :: v_dual_mov_b32 v55, s49 +; GFX12-NEXT: v_lshrrev_b16 v7, 8, s10 ; GFX12-NEXT: s_ashr_i32 s43, s12, 24 ; GFX12-NEXT: s_bfe_i32 s44, s12, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s12, s12 -; GFX12-NEXT: v_lshrrev_b16 v6, 8, s11 -; GFX12-NEXT: s_ashr_i32 s39, s10, 24 +; GFX12-NEXT: v_bfe_i32 v49, v2, 0, 8 +; GFX12-NEXT: v_dual_mov_b32 v48, s14 :: v_dual_mov_b32 v51, s47 +; GFX12-NEXT: v_dual_mov_b32 v50, s48 :: v_dual_mov_b32 v47, s45 +; GFX12-NEXT: v_lshrrev_b16 v8, 8, s9 ; GFX12-NEXT: s_ashr_i32 s41, s11, 24 ; GFX12-NEXT: s_bfe_i32 s42, s11, 0x80010 -; GFX12-NEXT: v_bfe_i32 v47, v2, 0, 8 -; GFX12-NEXT: v_mov_b32_e32 v46, s14 -; GFX12-NEXT: v_bfe_i32 v51, v3, 0, 8 -; GFX12-NEXT: v_mov_b32_e32 v50, s13 -; GFX12-NEXT: v_mov_b32_e32 v52, s46 ; GFX12-NEXT: s_sext_i32_i8 s11, s11 -; GFX12-NEXT: v_mov_b32_e32 v48, s48 -; GFX12-NEXT: v_lshrrev_b16 v7, 8, s10 -; GFX12-NEXT: s_ashr_i32 s35, s8, 24 -; GFX12-NEXT: s_ashr_i32 s37, s9, 24 -; GFX12-NEXT: v_bfe_i32 v41, v4, 0, 8 -; GFX12-NEXT: global_store_b128 v54, v[42:45], s[16:17] offset:240 -; GFX12-NEXT: v_dual_mov_b32 v40, s12 :: v_dual_mov_b32 v37, s37 -; GFX12-NEXT: v_dual_mov_b32 v42, s44 :: v_dual_mov_b32 v33, s35 -; GFX12-NEXT: v_mov_b32_e32 v43, s43 -; GFX12-NEXT: v_lshrrev_b16 v8, 8, s9 +; GFX12-NEXT: v_bfe_i32 v45, v3, 0, 8 +; GFX12-NEXT: v_dual_mov_b32 v44, s13 :: v_dual_mov_b32 v43, s43 +; GFX12-NEXT: v_mov_b32_e32 v46, s46 ; GFX12-NEXT: v_lshrrev_b16 v10, 8, s8 +; GFX12-NEXT: s_ashr_i32 s39, s10, 24 +; GFX12-NEXT: v_bfe_i32 v41, v4, 0, 8 +; GFX12-NEXT: v_dual_mov_b32 v40, s12 :: v_dual_mov_b32 v57, s42 +; GFX12-NEXT: v_mov_b32_e32 v42, s44 ; GFX12-NEXT: v_lshrrev_b16 v11, 8, s7 ; GFX12-NEXT: v_lshrrev_b16 v12, 8, s6 ; GFX12-NEXT: s_bfe_i32 s40, s10, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s10, s10 ; GFX12-NEXT: v_lshrrev_b16 v14, 8, s5 ; GFX12-NEXT: v_lshrrev_b16 v15, 8, s4 -; GFX12-NEXT: s_ashr_i32 s33, s7, 24 +; GFX12-NEXT: s_ashr_i32 s37, s9, 24 ; GFX12-NEXT: s_bfe_i32 s38, s9, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s9, s9 -; GFX12-NEXT: global_store_b128 v54, v[46:49], s[16:17] offset:224 -; GFX12-NEXT: v_bfe_i32 v44, v6, 0, 8 -; GFX12-NEXT: v_mov_b32_e32 v45, s42 -; GFX12-NEXT: s_clause 0x1 -; GFX12-NEXT: global_store_b128 v54, v[50:53], s[16:17] offset:208 -; GFX12-NEXT: global_store_b128 v54, v[40:43], s[16:17] offset:192 -; GFX12-NEXT: v_dual_mov_b32 v43, s11 :: v_dual_mov_b32 v46, s41 -; GFX12-NEXT: v_mov_b32_e32 v29, s33 -; GFX12-NEXT: s_ashr_i32 s28, s5, 24 -; GFX12-NEXT: s_ashr_i32 s30, s6, 24 +; GFX12-NEXT: v_bfe_i32 v56, v6, 0, 8 +; GFX12-NEXT: s_clause 0x3 +; GFX12-NEXT: global_store_b128 v59, v[52:55], s[16:17] offset:240 +; GFX12-NEXT: global_store_b128 v59, v[48:51], s[16:17] offset:224 +; GFX12-NEXT: global_store_b128 v59, v[44:47], s[16:17] offset:208 +; GFX12-NEXT: global_store_b128 v59, v[40:43], s[16:17] offset:192 +; GFX12-NEXT: v_mov_b32_e32 v41, s39 +; GFX12-NEXT: v_dual_mov_b32 v55, s11 :: v_dual_mov_b32 v58, s41 +; GFX12-NEXT: v_mov_b32_e32 v37, s37 +; GFX12-NEXT: s_ashr_i32 s33, s7, 24 +; GFX12-NEXT: s_ashr_i32 s35, s8, 24 ; GFX12-NEXT: s_bfe_i32 s36, s8, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s8, s8 ; GFX12-NEXT: v_bfe_i32 v39, v7, 0, 8 -; GFX12-NEXT: v_dual_mov_b32 v38, s10 :: v_dual_mov_b32 v25, s30 -; GFX12-NEXT: v_dual_mov_b32 v40, s40 :: v_dual_mov_b32 v41, s39 +; GFX12-NEXT: v_dual_mov_b32 v38, s10 :: v_dual_mov_b32 v33, s35 +; GFX12-NEXT: v_dual_mov_b32 v40, s40 :: v_dual_mov_b32 v29, s33 ; GFX12-NEXT: v_lshrrev_b16 v13, 8, s3 -; GFX12-NEXT: s_ashr_i32 s24, s3, 24 -; GFX12-NEXT: s_ashr_i32 s26, s4, 24 +; GFX12-NEXT: s_ashr_i32 s28, s5, 24 +; GFX12-NEXT: s_ashr_i32 s30, s6, 24 ; GFX12-NEXT: s_bfe_i32 s31, s6, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s6, s6 ; GFX12-NEXT: s_bfe_i32 s34, s7, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s7, s7 ; GFX12-NEXT: v_bfe_i32 v35, v8, 0, 8 -; GFX12-NEXT: v_dual_mov_b32 v34, s9 :: v_dual_mov_b32 v19, s26 +; GFX12-NEXT: v_dual_mov_b32 v34, s9 :: v_dual_mov_b32 v25, s30 ; GFX12-NEXT: v_mov_b32_e32 v36, s38 ; GFX12-NEXT: v_lshrrev_b16 v9, 8, s2 ; GFX12-NEXT: s_ashr_i32 s18, s0, 24 ; GFX12-NEXT: s_ashr_i32 s20, s1, 24 ; GFX12-NEXT: s_ashr_i32 s22, s2, 24 +; GFX12-NEXT: s_ashr_i32 s24, s3, 24 +; GFX12-NEXT: s_ashr_i32 s26, s4, 24 ; GFX12-NEXT: s_bfe_i32 s29, s5, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s5, s5 ; GFX12-NEXT: v_bfe_i32 v31, v10, 0, 8 -; GFX12-NEXT: v_mov_b32_e32 v30, s8 -; GFX12-NEXT: v_dual_mov_b32 v32, s36 :: v_dual_mov_b32 v7, s20 +; GFX12-NEXT: v_dual_mov_b32 v30, s8 :: v_dual_mov_b32 v19, s26 +; GFX12-NEXT: v_mov_b32_e32 v32, s36 ; GFX12-NEXT: v_lshrrev_b16 v5, 8, s1 ; GFX12-NEXT: s_bfe_i32 s27, s4, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s4, s4 ; GFX12-NEXT: v_bfe_i32 v23, v12, 0, 8 ; GFX12-NEXT: v_bfe_i32 v27, v11, 0, 8 -; GFX12-NEXT: v_dual_mov_b32 v26, s7 :: v_dual_mov_b32 v3, s18 -; GFX12-NEXT: v_mov_b32_e32 v28, s34 -; GFX12-NEXT: v_mov_b32_e32 v22, s6 +; GFX12-NEXT: v_dual_mov_b32 v26, s7 :: v_dual_mov_b32 v11, s22 +; GFX12-NEXT: v_dual_mov_b32 v28, s34 :: v_dual_mov_b32 v7, s20 +; GFX12-NEXT: v_dual_mov_b32 v22, s6 :: v_dual_mov_b32 v3, s18 ; GFX12-NEXT: v_lshrrev_b16 v1, 8, s0 ; GFX12-NEXT: s_bfe_i32 s25, s3, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s3, s3 @@ -5154,14 +5148,13 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o ; GFX12-NEXT: v_bfe_i32 v21, v14, 0, 8 ; GFX12-NEXT: v_mov_b32_e32 v24, s31 ; GFX12-NEXT: v_dual_mov_b32 v20, s5 :: v_dual_mov_b32 v15, s24 -; GFX12-NEXT: v_mov_b32_e32 v11, s22 ; GFX12-NEXT: s_clause 0x5 -; GFX12-NEXT: global_store_b128 v54, v[43:46], s[16:17] offset:176 -; GFX12-NEXT: global_store_b128 v54, v[38:41], s[16:17] offset:160 -; GFX12-NEXT: global_store_b128 v54, v[34:37], s[16:17] offset:144 -; GFX12-NEXT: global_store_b128 v54, v[30:33], s[16:17] offset:128 -; GFX12-NEXT: global_store_b128 v54, v[26:29], s[16:17] offset:112 -; GFX12-NEXT: global_store_b128 v54, v[22:25], s[16:17] offset:96 +; GFX12-NEXT: global_store_b128 v59, v[55:58], s[16:17] offset:176 +; GFX12-NEXT: global_store_b128 v59, v[38:41], s[16:17] offset:160 +; GFX12-NEXT: global_store_b128 v59, v[34:37], s[16:17] offset:144 +; GFX12-NEXT: global_store_b128 v59, v[30:33], s[16:17] offset:128 +; GFX12-NEXT: global_store_b128 v59, v[26:29], s[16:17] offset:112 +; GFX12-NEXT: global_store_b128 v59, v[22:25], s[16:17] offset:96 ; GFX12-NEXT: v_dual_mov_b32 v22, s29 :: v_dual_mov_b32 v23, s28 ; GFX12-NEXT: s_bfe_i32 s23, s2, 0x80010 ; GFX12-NEXT: s_sext_i32_i8 s2, s2 @@ -5184,12 +5177,12 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o ; GFX12-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-NEXT: v_mov_b32_e32 v2, s19 ; GFX12-NEXT: s_clause 0x5 -; GFX12-NEXT: global_store_b128 v54, v[20:23], s[16:17] offset:80 -; GFX12-NEXT: global_store_b128 v54, v[16:19], s[16:17] offset:64 -; GFX12-NEXT: global_store_b128 v54, v[12:15], s[16:17] offset:48 -; GFX12-NEXT: global_store_b128 v54, v[8:11], s[16:17] offset:32 -; GFX12-NEXT: global_store_b128 v54, v[4:7], s[16:17] offset:16 -; GFX12-NEXT: global_store_b128 v54, v[0:3], s[16:17] +; GFX12-NEXT: global_store_b128 v59, v[20:23], s[16:17] offset:80 +; GFX12-NEXT: global_store_b128 v59, v[16:19], s[16:17] offset:64 +; GFX12-NEXT: global_store_b128 v59, v[12:15], s[16:17] offset:48 +; GFX12-NEXT: global_store_b128 v59, v[8:11], s[16:17] offset:32 +; GFX12-NEXT: global_store_b128 v59, v[4:7], s[16:17] offset:16 +; GFX12-NEXT: global_store_b128 v59, v[0:3], s[16:17] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm @@ -8804,114 +8797,113 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_lshrrev_b16 v1, 8, s7 -; GFX12-NEXT: v_lshrrev_b16 v2, 8, s6 -; GFX12-NEXT: v_lshrrev_b16 v4, 8, s5 -; GFX12-NEXT: v_lshrrev_b16 v8, 8, s2 -; GFX12-NEXT: s_lshr_b32 s22, s7, 16 -; GFX12-NEXT: v_bfe_i32 v31, v1, 0, 8 -; GFX12-NEXT: s_lshr_b32 s40, s2, 24 +; GFX12-NEXT: v_lshrrev_b16 v0, 8, s7 +; GFX12-NEXT: v_lshrrev_b16 v3, 8, s5 +; GFX12-NEXT: v_lshrrev_b16 v7, 8, s2 +; GFX12-NEXT: v_lshrrev_b16 v1, 8, s6 +; GFX12-NEXT: v_lshrrev_b16 v4, 8, s4 +; GFX12-NEXT: v_lshrrev_b16 v6, 8, s1 +; GFX12-NEXT: v_lshrrev_b16 v5, 8, s3 +; GFX12-NEXT: v_lshrrev_b16 v2, 8, s0 +; GFX12-NEXT: s_lshr_b32 s20, s7, 16 +; GFX12-NEXT: s_lshr_b32 s24, s6, 24 +; GFX12-NEXT: s_lshr_b32 s26, s5, 16 +; GFX12-NEXT: s_lshr_b32 s36, s2, 16 +; GFX12-NEXT: s_lshr_b32 s38, s2, 24 +; GFX12-NEXT: v_bfe_i32 v10, v7, 0, 8 +; GFX12-NEXT: v_bfe_i32 v22, v3, 0, 8 +; GFX12-NEXT: v_bfe_i32 v30, v0, 0, 8 +; GFX12-NEXT: s_lshr_b32 s42, s0, 16 ; GFX12-NEXT: s_mov_b32 s46, s7 -; GFX12-NEXT: v_lshrrev_b16 v5, 8, s4 -; GFX12-NEXT: v_lshrrev_b16 v7, 8, s1 -; GFX12-NEXT: s_lshr_b32 s24, s6, 16 -; GFX12-NEXT: s_lshr_b32 s42, s1, 16 -; GFX12-NEXT: s_ashr_i64 s[58:59], s[6:7], 56 -; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 -; GFX12-NEXT: v_lshrrev_b16 v6, 8, s3 -; GFX12-NEXT: v_lshrrev_b16 v3, 8, s0 -; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s22 -; GFX12-NEXT: s_lshr_b32 s26, s6, 24 -; GFX12-NEXT: s_lshr_b32 s28, s5, 16 -; GFX12-NEXT: s_lshr_b32 s38, s2, 16 -; GFX12-NEXT: v_bfe_i32 v11, v8, 0, 8 -; GFX12-NEXT: v_bfe_i32 v23, v4, 0, 8 -; GFX12-NEXT: v_bfe_i32 v27, v2, 0, 8 -; GFX12-NEXT: v_ashrrev_i32_e32 v32, 31, v31 -; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v34, s23 :: v_dual_mov_b32 v35, s58 -; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s24 -; GFX12-NEXT: v_dual_mov_b32 v56, s41 :: v_dual_mov_b32 v29, s46 -; GFX12-NEXT: v_mov_b32_e32 v30, s47 -; GFX12-NEXT: s_lshr_b32 s44, s0, 24 ; GFX12-NEXT: s_mov_b32 s48, s5 ; GFX12-NEXT: s_mov_b32 s50, s3 -; GFX12-NEXT: s_lshr_b32 s30, s4, 16 -; GFX12-NEXT: s_lshr_b32 s34, s4, 24 +; GFX12-NEXT: s_lshr_b32 s22, s6, 16 +; GFX12-NEXT: s_lshr_b32 s28, s4, 16 +; GFX12-NEXT: s_lshr_b32 s30, s4, 24 +; GFX12-NEXT: s_lshr_b32 s40, s1, 16 +; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000 ; GFX12-NEXT: s_ashr_i64 s[54:55], s[2:3], 56 ; GFX12-NEXT: s_ashr_i64 s[56:57], s[4:5], 56 -; GFX12-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-NEXT: v_bfe_i32 v19, v5, 0, 8 +; GFX12-NEXT: s_ashr_i64 s[6:7], s[6:7], 56 +; GFX12-NEXT: v_bfe_i32 v6, v6, 0, 8 +; GFX12-NEXT: v_bfe_i32 v18, v4, 0, 8 +; GFX12-NEXT: v_bfe_i32 v26, v1, 0, 8 ; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 -; GFX12-NEXT: s_lshr_b32 s36, s3, 16 +; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 +; GFX12-NEXT: s_lshr_b32 s34, s3, 16 +; GFX12-NEXT: s_lshr_b32 s44, s0, 24 ; GFX12-NEXT: s_mov_b32 s52, s1 ; GFX12-NEXT: s_bfe_i64 s[12:13], s[2:3], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[50:51], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[4:5], s[48:49], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[6:7], s[44:45], 0x80000 -; GFX12-NEXT: s_lshr_b32 s20, s0, 16 +; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v64, 0 :: v_dual_mov_b32 v33, s21 ; GFX12-NEXT: s_ashr_i64 s[18:19], s[0:1], 56 -; GFX12-NEXT: v_bfe_i32 v3, v3, 0, 8 -; GFX12-NEXT: v_bfe_i32 v15, v6, 0, 8 -; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000 +; GFX12-NEXT: v_bfe_i32 v2, v2, 0, 8 +; GFX12-NEXT: v_bfe_i32 v14, v5, 0, 8 +; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v38, s25 :: v_dual_mov_b32 v39, s26 -; GFX12-NEXT: v_dual_mov_b32 v40, s27 :: v_dual_mov_b32 v41, s28 -; GFX12-NEXT: v_dual_mov_b32 v42, s29 :: v_dual_mov_b32 v43, s56 -; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s30 -; GFX12-NEXT: v_dual_mov_b32 v52, s55 :: v_dual_mov_b32 v53, s38 -; GFX12-NEXT: v_dual_mov_b32 v54, s39 :: v_dual_mov_b32 v55, s40 +; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v32, s20 :: v_dual_mov_b32 v35, s7 +; GFX12-NEXT: v_dual_mov_b32 v34, s6 :: v_dual_mov_b32 v37, s23 +; GFX12-NEXT: v_dual_mov_b32 v38, s24 :: v_dual_mov_b32 v41, s27 +; GFX12-NEXT: v_dual_mov_b32 v40, s26 :: v_dual_mov_b32 v43, s57 +; GFX12-NEXT: v_dual_mov_b32 v42, s56 :: v_dual_mov_b32 v45, s29 +; GFX12-NEXT: v_dual_mov_b32 v50, s54 :: v_dual_mov_b32 v53, s37 +; GFX12-NEXT: v_dual_mov_b32 v52, s36 :: v_dual_mov_b32 v55, s39 +; GFX12-NEXT: v_dual_mov_b32 v54, s38 :: v_dual_mov_b32 v57, s41 ; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[0:1], s[52:53], 0x80000 -; GFX12-NEXT: v_ashrrev_i32_e32 v12, 31, v11 -; GFX12-NEXT: v_ashrrev_i32_e32 v24, 31, v23 -; GFX12-NEXT: v_ashrrev_i32_e32 v28, 31, v27 -; GFX12-NEXT: global_store_b128 v0, v[33:36], s[8:9] offset:240 -; GFX12-NEXT: v_mov_b32_e32 v33, s42 -; GFX12-NEXT: global_store_b128 v0, v[29:32], s[8:9] offset:224 -; GFX12-NEXT: v_dual_mov_b32 v25, s16 :: v_dual_mov_b32 v26, s17 -; GFX12-NEXT: v_dual_mov_b32 v32, s7 :: v_dual_mov_b32 v21, s4 -; GFX12-NEXT: v_dual_mov_b32 v22, s5 :: v_dual_mov_b32 v17, s14 -; GFX12-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v9, s12 -; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v5, s0 -; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v46, s31 :: v_dual_mov_b32 v47, s34 -; GFX12-NEXT: v_dual_mov_b32 v48, s35 :: v_dual_mov_b32 v49, s36 -; GFX12-NEXT: v_dual_mov_b32 v34, s43 :: v_dual_mov_b32 v35, s18 -; GFX12-NEXT: v_dual_mov_b32 v36, s19 :: v_dual_mov_b32 v29, s20 -; GFX12-NEXT: v_ashrrev_i32_e32 v8, 31, v7 -; GFX12-NEXT: v_ashrrev_i32_e32 v20, 31, v19 -; GFX12-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v13, s2 -; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v1, s10 -; GFX12-NEXT: v_dual_mov_b32 v50, s37 :: v_dual_mov_b32 v51, s54 -; GFX12-NEXT: v_dual_mov_b32 v30, s21 :: v_dual_mov_b32 v31, s6 -; GFX12-NEXT: v_ashrrev_i32_e32 v4, 31, v3 -; GFX12-NEXT: v_ashrrev_i32_e32 v16, 31, v15 -; GFX12-NEXT: s_clause 0x7 -; GFX12-NEXT: global_store_b128 v0, v[37:40], s[8:9] offset:208 -; GFX12-NEXT: global_store_b128 v0, v[25:28], s[8:9] offset:192 -; GFX12-NEXT: global_store_b128 v0, v[41:44], s[8:9] offset:176 -; GFX12-NEXT: global_store_b128 v0, v[21:24], s[8:9] offset:160 -; GFX12-NEXT: global_store_b128 v0, v[45:48], s[8:9] offset:144 -; GFX12-NEXT: global_store_b128 v0, v[17:20], s[8:9] offset:128 -; GFX12-NEXT: global_store_b128 v0, v[49:52], s[8:9] offset:112 -; GFX12-NEXT: global_store_b128 v0, v[13:16], s[8:9] offset:96 -; GFX12-NEXT: v_mov_b32_e32 v2, s11 +; GFX12-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; GFX12-NEXT: v_ashrrev_i32_e32 v23, 31, v22 +; GFX12-NEXT: v_ashrrev_i32_e32 v31, 31, v30 +; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v60, s42 :: v_dual_mov_b32 v29, s47 +; GFX12-NEXT: v_dual_mov_b32 v28, s46 :: v_dual_mov_b32 v63, s45 +; GFX12-NEXT: v_dual_mov_b32 v24, s16 :: v_dual_mov_b32 v21, s5 +; GFX12-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v17, s15 +; GFX12-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v9, s13 +; GFX12-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v5, s1 +; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v36, s22 :: v_dual_mov_b32 v39, s25 +; GFX12-NEXT: v_dual_mov_b32 v44, s28 :: v_dual_mov_b32 v47, s31 +; GFX12-NEXT: v_dual_mov_b32 v46, s30 :: v_dual_mov_b32 v49, s35 +; GFX12-NEXT: v_dual_mov_b32 v56, s40 :: v_dual_mov_b32 v59, s19 +; GFX12-NEXT: v_dual_mov_b32 v58, s18 :: v_dual_mov_b32 v61, s43 +; GFX12-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX12-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; GFX12-NEXT: v_ashrrev_i32_e32 v27, 31, v26 +; GFX12-NEXT: v_dual_mov_b32 v62, s44 :: v_dual_mov_b32 v25, s17 +; GFX12-NEXT: v_dual_mov_b32 v16, s14 :: v_dual_mov_b32 v13, s3 +; GFX12-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v1, s11 +; GFX12-NEXT: v_dual_mov_b32 v48, s34 :: v_dual_mov_b32 v51, s55 +; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX12-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX12-NEXT: s_clause 0x9 +; GFX12-NEXT: global_store_b128 v64, v[32:35], s[8:9] offset:240 +; GFX12-NEXT: global_store_b128 v64, v[28:31], s[8:9] offset:224 +; GFX12-NEXT: global_store_b128 v64, v[36:39], s[8:9] offset:208 +; GFX12-NEXT: global_store_b128 v64, v[24:27], s[8:9] offset:192 +; GFX12-NEXT: global_store_b128 v64, v[40:43], s[8:9] offset:176 +; GFX12-NEXT: global_store_b128 v64, v[20:23], s[8:9] offset:160 +; GFX12-NEXT: global_store_b128 v64, v[44:47], s[8:9] offset:144 +; GFX12-NEXT: global_store_b128 v64, v[16:19], s[8:9] offset:128 +; GFX12-NEXT: global_store_b128 v64, v[48:51], s[8:9] offset:112 +; GFX12-NEXT: global_store_b128 v64, v[12:15], s[8:9] offset:96 +; GFX12-NEXT: v_mov_b32_e32 v0, s10 ; GFX12-NEXT: s_clause 0x5 -; GFX12-NEXT: global_store_b128 v0, v[53:56], s[8:9] offset:80 -; GFX12-NEXT: global_store_b128 v0, v[9:12], s[8:9] offset:64 -; GFX12-NEXT: global_store_b128 v0, v[33:36], s[8:9] offset:48 -; GFX12-NEXT: global_store_b128 v0, v[5:8], s[8:9] offset:32 -; GFX12-NEXT: global_store_b128 v0, v[29:32], s[8:9] offset:16 -; GFX12-NEXT: global_store_b128 v0, v[1:4], s[8:9] +; GFX12-NEXT: global_store_b128 v64, v[52:55], s[8:9] offset:80 +; GFX12-NEXT: global_store_b128 v64, v[8:11], s[8:9] offset:64 +; GFX12-NEXT: global_store_b128 v64, v[56:59], s[8:9] offset:48 +; GFX12-NEXT: global_store_b128 v64, v[4:7], s[8:9] offset:32 +; GFX12-NEXT: global_store_b128 v64, v[60:63], s[8:9] offset:16 +; GFX12-NEXT: global_store_b128 v64, v[0:3], s[8:9] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm |
