summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/function-args.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/function-args.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/function-args.ll498
1 files changed, 286 insertions, 212 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll
index 81b8b3618074..a901d7f97eb3 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args.ll
@@ -3380,42 +3380,117 @@ define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i
}
define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
-; CIGFX89-LABEL: void_func_v32i32_v2i64_v2f64:
-; CIGFX89: ; %bb.0:
-; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
-; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
-; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
-; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
-; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
-; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
-; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
-; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
-; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
-; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
-; CIGFX89-NEXT: s_mov_b32 s6, -1
-; CIGFX89-NEXT: s_waitcnt vmcnt(8)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
-; CIGFX89-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: void_func_v32i32_v2i64_v2f64:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32
+; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28
+; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16
+; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12
+; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8
+; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4
+; CI-NEXT: s_mov_b32 s7, 0xf000
+; CI-NEXT: s_mov_b32 s6, -1
+; CI-NEXT: s_waitcnt vmcnt(7)
+; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: void_func_v32i32_v2i64_v2f64:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32
+; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28
+; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16
+; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12
+; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8
+; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_waitcnt vmcnt(7)
+; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: void_func_v32i32_v2i64_v2f64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32
+; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
+; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16
+; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12
+; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8
+; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4
+; GFX9-NEXT: s_mov_b32 s7, 0xf000
+; GFX9-NEXT: s_mov_b32 s6, -1
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v2i64_v2f64:
; GFX11: ; %bb.0:
@@ -3552,13 +3627,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
-; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
-; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
-; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
-; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
-; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16
-; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12
-; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8
+; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
+; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
+; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
+; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
+; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
+; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
+; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
; CI-NEXT: s_waitcnt vmcnt(7)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
@@ -3570,29 +3645,29 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4
-; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
-; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
-; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
-; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
-; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48
-; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44
-; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40
-; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36
+; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
+; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
+; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
+; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
+; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
+; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
+; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
+; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
+; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
-; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v8i32_v8f32:
@@ -3601,13 +3676,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
-; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
-; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
-; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
-; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
-; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16
-; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12
-; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8
+; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
+; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
+; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
+; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
+; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
+; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
+; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
; VI-NEXT: s_waitcnt vmcnt(7)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
@@ -3619,29 +3694,29 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4
-; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
-; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
-; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
-; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
-; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48
-; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44
-; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40
-; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36
+; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
+; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
+; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
+; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
+; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
+; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
+; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
+; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
+; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
-; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v8i32_v8f32:
@@ -3650,13 +3725,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
-; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12
-; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8
+; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
+; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
+; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
+; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
+; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
+; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
; GFX9-NEXT: s_waitcnt vmcnt(7)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -3668,15 +3743,15 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4
-; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
-; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
-; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48
-; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44
-; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40
-; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36
+; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
+; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
+; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
+; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
+; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
+; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
+; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
+; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
+; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -3684,14 +3759,14 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v8i32_v8f32:
@@ -3791,40 +3866,40 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
-; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
-; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
-; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
-; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
-; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112
-; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108
-; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104
+; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112
+; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108
+; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104
+; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100
+; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128
+; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124
+; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100
-; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128
-; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124
-; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120
-; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116
-; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80
-; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76
-; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72
-; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68
+; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116
+; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80
+; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76
+; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72
+; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96
+; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92
+; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88
+; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84
+; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
-; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v16i32_v16f32:
@@ -3864,40 +3939,40 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
-; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
-; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
-; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
-; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
-; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112
-; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108
-; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104
+; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112
+; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108
+; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104
+; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100
+; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128
+; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124
+; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100
-; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128
-; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124
-; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120
-; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116
-; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80
-; VI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76
-; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72
-; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68
+; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116
+; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80
+; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76
+; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72
+; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96
+; VI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92
+; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88
+; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84
+; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
-; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v16i32_v16f32:
@@ -3938,27 +4013,27 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
-; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
-; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
-; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
-; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
-; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112
-; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108
-; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104
+; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112
+; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108
+; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104
+; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100
+; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128
+; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124
+; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100
-; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128
-; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124
-; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120
-; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116
-; GFX9-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80
-; GFX9-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76
-; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72
-; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68
+; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116
+; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80
+; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76
+; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72
+; GFX9-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96
+; GFX9-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92
+; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88
+; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84
+; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -3966,14 +4041,14 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v16i32_v16f32:
@@ -4259,9 +4334,9 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
-; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60
-; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:64
-; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:48
+; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
+; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
+; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:52
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
@@ -4275,16 +4350,16 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:28
-; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:32
-; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:20
-; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:24
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:16
-; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:12
-; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:8
-; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:4
+; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:32
+; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28
+; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:24
+; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:20
+; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16
+; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12
+; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:8
+; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:44
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
@@ -4292,15 +4367,15 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_byte v33, off, s[4:7], 0
+; CI-NEXT: buffer_store_byte v34, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_byte v32, off, s[4:7], 0
+; CI-NEXT: buffer_store_byte v33, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v36, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_byte v34, off, s[4:7], 0
+; CI-NEXT: buffer_store_byte v32, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
@@ -4308,14 +4383,6 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v37, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0
-; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0
-; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0
-; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0
-; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0
@@ -4324,6 +4391,14 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0
+; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v16i8:
@@ -4332,9 +4407,9 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
-; VI-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60
-; VI-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64
-; VI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48
+; VI-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:48
+; VI-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:60
+; VI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:64
; VI-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52
; VI-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56
; VI-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36
@@ -4348,16 +4423,16 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28
-; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32
-; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20
-; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16
-; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12
-; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8
-; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4
+; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:32
+; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:28
+; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:24
+; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:20
+; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:16
+; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:12
+; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:8
+; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
@@ -4365,15 +4440,15 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_byte v33, off, s[4:7], 0
+; VI-NEXT: buffer_store_byte v34, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_byte v32, off, s[4:7], 0
+; VI-NEXT: buffer_store_byte v33, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v36, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v35, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_byte v34, off, s[4:7], 0
+; VI-NEXT: buffer_store_byte v32, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
@@ -4381,14 +4456,6 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v37, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0
-; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0
-; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0
-; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0
-; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0
@@ -4397,6 +4464,14 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0
+; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v16i8:
@@ -4405,9 +4480,9 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60
-; GFX9-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64
-; GFX9-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48
+; GFX9-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:48
+; GFX9-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:60
+; GFX9-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:64
; GFX9-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52
; GFX9-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56
; GFX9-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36
@@ -4421,18 +4496,17 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28
-; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32
-; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20
-; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24
-; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44
-; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16
-; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12
-; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8
-; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4
+; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:32
+; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:28
+; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:24
+; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:20
+; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:16
+; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:12
+; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:8
+; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:4
+; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -4440,15 +4514,15 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_byte v33, off, s[4:7], 0
+; GFX9-NEXT: buffer_store_byte v34, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_byte v32, off, s[4:7], 0
+; GFX9-NEXT: buffer_store_byte v33, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v36, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v35, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_byte v34, off, s[4:7], 0
+; GFX9-NEXT: buffer_store_byte v32, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -4456,14 +4530,6 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v37, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0
@@ -4472,6 +4538,14 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: void_func_v32i32_v16i8: