summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/ds_write2.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/ds_write2.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/ds_write2.ll150
1 files changed, 75 insertions, 75 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
index 06908d21e535..9f191fa69f65 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
@@ -9,7 +9,7 @@
define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_one_val_f32:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -23,7 +23,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr ad
;
; GFX9-LABEL: simple_write2_one_val_f32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
@@ -44,7 +44,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr ad
define amdgpu_kernel void @simple_write2_two_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_two_val_f32:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -60,7 +60,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(ptr addrspace(1) %C, ptr ad
;
; GFX9-LABEL: simple_write2_two_val_f32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc
@@ -85,7 +85,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(ptr addrspace(1) %C, ptr ad
define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
; CI-LABEL: simple_write2_two_val_f32_volatile_0:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -105,7 +105,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(ptr addrspace(1)
;
; GFX9-LABEL: simple_write2_two_val_f32_volatile_0:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc
@@ -131,7 +131,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(ptr addrspace(1)
define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
; CI-LABEL: simple_write2_two_val_f32_volatile_1:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -151,7 +151,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(ptr addrspace(1)
;
; GFX9-LABEL: simple_write2_two_val_f32_volatile_1:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc
@@ -182,7 +182,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(ptr addrspace(1)
define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_two_val_subreg2_mixed_f32:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v1, 3, v0
@@ -199,7 +199,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(ptr addrspace
;
; GFX9-LABEL: simple_write2_two_val_subreg2_mixed_f32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: ; kill: killed $vgpr4
@@ -229,7 +229,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(ptr addrspace
define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_two_val_subreg2_f32:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v1, 3, v0
@@ -244,7 +244,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(ptr addrspace(1) %C
;
; GFX9-LABEL: simple_write2_two_val_subreg2_f32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -268,7 +268,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(ptr addrspace(1) %C
define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_two_val_subreg4_f32:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v1, 4, v0
@@ -283,7 +283,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(ptr addrspace(1) %C
;
; GFX9-LABEL: simple_write2_two_val_subreg4_f32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -307,7 +307,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(ptr addrspace(1) %C
define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_two_val_max_offset_f32:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -323,7 +323,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(ptr addrspace(1)
;
; GFX9-LABEL: simple_write2_two_val_max_offset_f32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc
@@ -348,7 +348,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(ptr addrspace(1)
define amdgpu_kernel void @simple_write2_two_val_too_far_f32(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
; CI-LABEL: simple_write2_two_val_too_far_f32:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -368,7 +368,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(ptr addrspace(1) %C
;
; GFX9-LABEL: simple_write2_two_val_too_far_f32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
@@ -394,7 +394,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(ptr addrspace(1) %C
define amdgpu_kernel void @simple_write2_two_val_f32_x2(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
; CI-LABEL: simple_write2_two_val_f32_x2:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -413,7 +413,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(ptr addrspace(1) %C, ptr
;
; GFX9-LABEL: simple_write2_two_val_f32_x2:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
@@ -450,7 +450,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(ptr addrspace(1) %C, ptr
define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
; CI-LABEL: simple_write2_two_val_f32_x2_nonzero_base:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
@@ -469,7 +469,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(ptr addrspa
;
; GFX9-LABEL: simple_write2_two_val_f32_x2_nonzero_base:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
@@ -506,21 +506,21 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(ptr addrspa
define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1, <2 x ptr addrspace(3)> %lds.ptr) #0 {
; CI-LABEL: write2_ptr_subreg_arg_two_val_f32:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2
-; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x6
-; CI-NEXT: s_mov_b32 s3, 0xf000
-; CI-NEXT: s_mov_b32 s2, 0
+; CI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2
+; CI-NEXT: s_load_dwordx2 s[12:13], s[2:3], 0x6
+; CI-NEXT: s_mov_b32 s11, 0xf000
+; CI-NEXT: s_mov_b32 s10, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: s_mov_b64 s[0:1], s[4:5]
+; CI-NEXT: s_mov_b64 s[8:9], s[4:5]
; CI-NEXT: v_mov_b32_e32 v1, 0
-; CI-NEXT: s_mov_b64 s[4:5], s[6:7]
-; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
-; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
-; CI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
-; CI-NEXT: v_mov_b32_e32 v1, s8
+; CI-NEXT: s_mov_b64 s[0:1], s[6:7]
+; CI-NEXT: s_mov_b64 s[2:3], s[10:11]
+; CI-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64
+; CI-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; CI-NEXT: v_mov_b32_e32 v1, s12
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: v_mov_b32_e32 v3, s9
+; CI-NEXT: v_mov_b32_e32 v3, s13
; CI-NEXT: s_waitcnt vmcnt(1)
; CI-NEXT: ds_write_b32 v1, v2 offset:32
; CI-NEXT: s_waitcnt vmcnt(0)
@@ -529,14 +529,14 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(ptr addrspace(1) %C
;
; GFX9-LABEL: write2_ptr_subreg_arg_two_val_f32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
-; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x18
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x18
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[4:5]
; GFX9-NEXT: global_load_dword v2, v0, s[6:7]
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v3, s1
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: ds_write_b32 v0, v1 offset:32
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -566,7 +566,7 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(ptr addrspace(1) %C
define amdgpu_kernel void @simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_one_val_f64:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
@@ -580,7 +580,7 @@ define amdgpu_kernel void @simple_write2_one_val_f64(ptr addrspace(1) %C, ptr ad
;
; GFX9-LABEL: simple_write2_one_val_f64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
@@ -601,15 +601,15 @@ define amdgpu_kernel void @simple_write2_one_val_f64(ptr addrspace(1) %C, ptr ad
define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) #0 {
; CI-LABEL: misaligned_simple_write2_one_val_f64:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2
-; CI-NEXT: s_load_dword s0, s[0:1], 0x4
-; CI-NEXT: s_mov_b32 s7, 0xf000
-; CI-NEXT: s_mov_b32 s6, 0
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
+; CI-NEXT: s_load_dword s4, s[2:3], 0x4
+; CI-NEXT: s_mov_b32 s3, 0xf000
+; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: buffer_load_dwordx2 v[1:2], v[0:1], s[4:7], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; CI-NEXT: buffer_load_dwordx2 v[1:2], v[0:1], s[0:3], 0 addr64
+; CI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
@@ -618,11 +618,11 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(ptr addrspace(1)
;
; GFX9-LABEL: misaligned_simple_write2_one_val_f64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
-; GFX9-NEXT: s_load_dword s4, s[0:1], 0x10
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
+; GFX9-NEXT: s_load_dword s4, s[2:3], 0x10
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
; GFX9-NEXT: v_add_u32_e32 v2, s4, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write2_b32 v2, v0, v1 offset1:1
@@ -642,15 +642,15 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(ptr addrspace(1)
define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) #0 {
; CI-LABEL: unaligned_offset_simple_write2_one_val_f64:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2
-; CI-NEXT: s_load_dword s0, s[0:1], 0x4
-; CI-NEXT: s_mov_b32 s7, 0xf000
-; CI-NEXT: s_mov_b32 s6, 0
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
+; CI-NEXT: s_load_dword s4, s[2:3], 0x4
+; CI-NEXT: s_mov_b32 s3, 0xf000
+; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: buffer_load_dwordx2 v[1:2], v[0:1], s[4:7], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; CI-NEXT: buffer_load_dwordx2 v[1:2], v[0:1], s[0:3], 0 addr64
+; CI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v3, 24, v1
@@ -675,11 +675,11 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(ptr addrsp
;
; GFX9-ALIGNED-LABEL: unaligned_offset_simple_write2_one_val_f64:
; GFX9-ALIGNED: ; %bb.0:
-; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
-; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x10
+; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
+; GFX9-ALIGNED-NEXT: s_load_dword s4, s[2:3], 0x10
; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-ALIGNED-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
+; GFX9-ALIGNED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
; GFX9-ALIGNED-NEXT: v_add_u32_e32 v2, s4, v2
; GFX9-ALIGNED-NEXT: s_waitcnt vmcnt(0)
; GFX9-ALIGNED-NEXT: ds_write_b8_d16_hi v2, v0 offset:7
@@ -702,11 +702,11 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(ptr addrsp
;
; GFX9-UNALIGNED-LABEL: unaligned_offset_simple_write2_one_val_f64:
; GFX9-UNALIGNED: ; %bb.0:
-; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
-; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x10
+; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
+; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[2:3], 0x10
; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-UNALIGNED-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
+; GFX9-UNALIGNED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
; GFX9-UNALIGNED-NEXT: v_add_u32_e32 v2, s4, v2
; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
; GFX9-UNALIGNED-NEXT: ds_write_b64 v2, v[0:1] offset:5
@@ -726,7 +726,7 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(ptr addrsp
define amdgpu_kernel void @simple_write2_two_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_two_val_f64:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
@@ -742,7 +742,7 @@ define amdgpu_kernel void @simple_write2_two_val_f64(ptr addrspace(1) %C, ptr ad
;
; GFX9-LABEL: simple_write2_two_val_f64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] glc
@@ -868,11 +868,11 @@ define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
define amdgpu_kernel void @write2_sgemm_sequence(ptr addrspace(1) %C, i32 %lda, i32 %ldb, ptr addrspace(1) %in) #0 {
; CI-LABEL: write2_sgemm_sequence:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x4
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x4
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_load_dword s0, s[0:1], 0x0
-; CI-NEXT: s_lshl_b32 s1, s2, 2
+; CI-NEXT: s_lshl_b32 s1, s6, 2
; CI-NEXT: s_add_i32 s2, s1, 0xc20
; CI-NEXT: s_addk_i32 s1, 0xc60
; CI-NEXT: v_mov_b32_e32 v0, s2
@@ -890,8 +890,8 @@ define amdgpu_kernel void @write2_sgemm_sequence(ptr addrspace(1) %C, i32 %lda,
;
; GFX9-LABEL: write2_sgemm_sequence:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x10
-; GFX9-NEXT: s_lshl_b32 s2, s2, 2
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x10
+; GFX9-NEXT: s_lshl_b32 s2, s6, 2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX9-NEXT: s_add_i32 s1, s2, 0xc20
@@ -945,12 +945,12 @@ define amdgpu_kernel void @write2_sgemm_sequence(ptr addrspace(1) %C, i32 %lda,
define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(ptr addrspace(3) %out, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_v4f32_superreg_align4:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2
-; CI-NEXT: s_load_dword s4, s[0:1], 0x0
+; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2
+; CI-NEXT: s_load_dword s4, s[2:3], 0x0
; CI-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
+; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; CI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v1, s0
@@ -963,11 +963,11 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(ptr addrspace(3)
;
; GFX9-ALIGNED-LABEL: simple_write2_v4f32_superreg_align4:
; GFX9-ALIGNED: ; %bb.0:
-; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
-; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x0
+; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
+; GFX9-ALIGNED-NEXT: s_load_dword s4, s[2:3], 0x0
; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-ALIGNED-NEXT: v_lshl_add_u32 v0, v0, 4, s4
-; GFX9-ALIGNED-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
+; GFX9-ALIGNED-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v1, s0
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, s1
@@ -979,11 +979,11 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(ptr addrspace(3)
;
; GFX9-UNALIGNED-LABEL: simple_write2_v4f32_superreg_align4:
; GFX9-UNALIGNED: ; %bb.0:
-; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
-; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x0
+; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x8
+; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[2:3], 0x0
; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-UNALIGNED-NEXT: v_lshl_add_u32 v0, v0, 4, s4
-; GFX9-UNALIGNED-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
+; GFX9-UNALIGNED-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, s2
; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, s3