diff options
| author | Amir Ayupov <aaupov@fb.com> | 2025-06-08 22:50:59 -0700 |
|---|---|---|
| committer | Amir Ayupov <aaupov@fb.com> | 2025-06-08 22:50:59 -0700 |
| commit | eb1dd48b4ab79e29e82bd396c01b4a7cc83bbd81 (patch) | |
| tree | ad446efb8062977bcf328af58cdc7fcd47864af2 /llvm/test/CodeGen/AMDGPU/function-args.ll | |
| parent | 81f92265327a8cf6e730ad1d01fd02f4ef76ed86 (diff) | |
| parent | 03bbd04bb7ae40dcda7b8bf1d6d09f63191503b0 (diff) | |
[𝘀𝗽𝗿] changes introduced through rebaseusers/aaupov/spr/main.boltnfci-simplify-dataaggregator-using-traces-1
Created using spr 1.3.4
[skip ci]
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/function-args.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/function-args.ll | 498 |
1 files changed, 286 insertions, 212 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll index 81b8b3618074..a901d7f97eb3 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -3380,42 +3380,117 @@ define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i } define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { -; CIGFX89-LABEL: void_func_v32i32_v2i64_v2f64: -; CIGFX89: ; %bb.0: -; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 -; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 -; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 -; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 -; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 -; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 -; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 -; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 -; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 -; CIGFX89-NEXT: s_mov_b32 s6, -1 -; CIGFX89-NEXT: s_waitcnt vmcnt(8) -; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 -; CIGFX89-NEXT: s_waitcnt vmcnt(0) -; CIGFX89-NEXT: s_setpc_b64 s[30:31] +; CI-LABEL: void_func_v32i32_v2i64_v2f64: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 +; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28 +; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 +; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16 +; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12 +; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8 +; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4 +; CI-NEXT: s_mov_b32 s7, 0xf000 +; CI-NEXT: s_mov_b32 s6, -1 +; CI-NEXT: s_waitcnt vmcnt(7) +; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 +; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: void_func_v32i32_v2i64_v2f64: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 +; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28 +; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 +; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16 +; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12 +; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8 +; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_waitcnt vmcnt(7) +; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 +; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: void_func_v32i32_v2i64_v2f64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 +; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28 +; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 +; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:16 +; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:12 +; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:8 +; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[35:38], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[31:34], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v2i64_v2f64: ; GFX11: ; %bb.0: @@ -3552,13 +3627,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 -; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 -; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 -; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 -; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 -; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 -; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 +; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 +; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 +; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 +; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 +; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 +; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 +; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 ; CI-NEXT: s_waitcnt vmcnt(7) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) @@ -3570,29 +3645,29 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 -; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 -; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 -; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 -; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 -; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 -; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 -; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 -; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 +; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 +; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48 +; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44 +; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40 +; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 +; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 +; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 +; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 +; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 -; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v8i32_v8f32: @@ -3601,13 +3676,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 -; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 -; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 -; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 -; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 -; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 -; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 -; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 +; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 +; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 +; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 +; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 +; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 +; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 +; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 ; VI-NEXT: s_waitcnt vmcnt(7) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -3619,29 +3694,29 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 -; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 -; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 -; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 -; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 -; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 -; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 -; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 -; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 +; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 +; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48 +; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44 +; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40 +; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 +; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 +; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 +; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 +; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 -; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v8i32_v8f32: @@ -3650,13 +3725,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 -; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 -; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 -; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 -; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 -; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 -; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 +; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 +; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 +; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 +; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 +; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 +; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 +; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 ; GFX9-NEXT: s_waitcnt vmcnt(7) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3668,15 +3743,15 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 -; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 -; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 -; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 -; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 -; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 -; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 -; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 -; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 +; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 +; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48 +; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44 +; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40 +; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 +; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 +; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 +; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 +; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3684,14 +3759,14 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v8i32_v8f32: @@ -3791,40 +3866,40 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 -; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 -; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 -; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 -; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 -; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 -; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 -; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 +; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 +; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 +; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 +; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 +; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128 +; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124 +; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120 ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 -; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 -; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 -; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 -; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 -; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 -; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 -; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 -; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 +; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116 +; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 +; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 +; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 +; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96 +; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92 +; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88 +; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84 +; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v16i32_v16f32: @@ -3864,40 +3939,40 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 -; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 -; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 -; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 -; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 -; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 -; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 -; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 +; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 +; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 +; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 +; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 +; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128 +; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124 +; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 -; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 -; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 -; VI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 -; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 -; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 +; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116 +; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 +; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 +; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 +; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96 +; VI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92 +; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88 +; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84 +; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v16i32_v16f32: @@ -3938,27 +4013,27 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 -; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 -; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 -; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 -; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 -; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 -; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 -; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 +; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 +; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 +; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 +; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 +; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:128 +; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:124 +; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:120 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 -; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 -; GFX9-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 -; GFX9-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 -; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 -; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 +; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:116 +; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 +; GFX9-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96 +; GFX9-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92 +; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88 +; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84 +; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3966,14 +4041,14 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v16i32_v16f32: @@ -4259,9 +4334,9 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60 -; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:64 -; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:48 +; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48 +; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60 +; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:64 ; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:52 ; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56 ; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36 @@ -4275,16 +4350,16 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:28 -; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:32 -; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:20 -; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:24 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:16 -; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:12 -; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:8 -; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:4 +; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:32 +; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28 +; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:24 +; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:20 +; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16 +; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12 +; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:8 +; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:44 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) @@ -4292,15 +4367,15 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_byte v33, off, s[4:7], 0 +; CI-NEXT: buffer_store_byte v34, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_byte v32, off, s[4:7], 0 +; CI-NEXT: buffer_store_byte v33, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v36, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_byte v34, off, s[4:7], 0 +; CI-NEXT: buffer_store_byte v32, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) @@ -4308,14 +4383,6 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v37, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0 -; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0 -; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0 -; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0 -; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0 @@ -4324,6 +4391,14 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v16i8: @@ -4332,9 +4407,9 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 -; VI-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60 -; VI-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64 -; VI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48 +; VI-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:48 +; VI-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:60 +; VI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:64 ; VI-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52 ; VI-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56 ; VI-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36 @@ -4348,16 +4423,16 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28 -; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32 -; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20 -; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16 -; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12 -; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8 -; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4 +; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:32 +; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:28 +; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:24 +; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:20 +; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:16 +; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:12 +; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:8 +; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -4365,15 +4440,15 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_byte v33, off, s[4:7], 0 +; VI-NEXT: buffer_store_byte v34, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_byte v32, off, s[4:7], 0 +; VI-NEXT: buffer_store_byte v33, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v36, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v35, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_byte v34, off, s[4:7], 0 +; VI-NEXT: buffer_store_byte v32, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -4381,14 +4456,6 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v37, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0 -; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0 @@ -4397,6 +4464,14 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0 +; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v16i8: @@ -4405,9 +4480,9 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 -; GFX9-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60 -; GFX9-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64 -; GFX9-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48 +; GFX9-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:48 +; GFX9-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:60 +; GFX9-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:64 ; GFX9-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52 ; GFX9-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56 ; GFX9-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36 @@ -4421,18 +4496,17 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28 -; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32 -; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20 -; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24 -; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44 -; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16 -; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12 -; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8 -; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4 +; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:32 +; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:28 +; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:24 +; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:20 +; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:16 +; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:12 +; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:8 +; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:4 +; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -4440,15 +4514,15 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_byte v33, off, s[4:7], 0 +; GFX9-NEXT: buffer_store_byte v34, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_byte v32, off, s[4:7], 0 +; GFX9-NEXT: buffer_store_byte v33, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v36, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v35, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_byte v34, off, s[4:7], 0 +; GFX9-NEXT: buffer_store_byte v32, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -4456,14 +4530,6 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v37, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0 @@ -4472,6 +4538,14 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-TRUE16-LABEL: void_func_v32i32_v16i8: |
