summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/rotr.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/rotr.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/rotr.ll44
1 files changed, 22 insertions, 22 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/rotr.ll b/llvm/test/CodeGen/AMDGPU/rotr.ll
index 058ee589bc4b..0e1dd69d930a 100644
--- a/llvm/test/CodeGen/AMDGPU/rotr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rotr.ll
@@ -19,7 +19,7 @@ define amdgpu_kernel void @rotr_i32(ptr addrspace(1) %in, i32 %x, i32 %y) {
;
; SI-LABEL: rotr_i32:
; SI: ; %bb.0: ; %entry
-; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
@@ -32,7 +32,7 @@ define amdgpu_kernel void @rotr_i32(ptr addrspace(1) %in, i32 %x, i32 %y) {
;
; GFX8-LABEL: rotr_i32:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s3
; GFX8-NEXT: v_alignbit_b32 v2, s2, s2, v0
@@ -43,7 +43,7 @@ define amdgpu_kernel void @rotr_i32(ptr addrspace(1) %in, i32 %x, i32 %y) {
;
; GFX10-LABEL: rotr_i32:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_alignbit_b32 v1, s2, s2, s3
@@ -52,7 +52,7 @@ define amdgpu_kernel void @rotr_i32(ptr addrspace(1) %in, i32 %x, i32 %y) {
;
; GFX11-LABEL: rotr_i32:
; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_alignbit_b32 v1, s2, s2, s3
@@ -84,8 +84,8 @@ define amdgpu_kernel void @rotr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i
;
; SI-LABEL: rotr_v2i32:
; SI: ; %bb.0: ; %entry
-; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb
-; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0xb
+; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
@@ -98,8 +98,8 @@ define amdgpu_kernel void @rotr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i
;
; GFX8-LABEL: rotr_v2i32:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c
-; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s7
; GFX8-NEXT: v_mov_b32_e32 v2, s6
@@ -113,20 +113,20 @@ define amdgpu_kernel void @rotr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i
; GFX10-LABEL: rotr_v2i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c
-; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2c
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_alignbit_b32 v1, s5, s5, s7
; GFX10-NEXT: v_alignbit_b32 v0, s4, s4, s6
-; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: rotr_v2i32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x2c
-; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x2c
+; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_alignbit_b32 v1, s5, s5, s7
@@ -161,8 +161,8 @@ define amdgpu_kernel void @rotr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i
;
; SI-LABEL: rotr_v4i32:
; SI: ; %bb.0: ; %entry
-; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd
-; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0xd
+; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
@@ -179,8 +179,8 @@ define amdgpu_kernel void @rotr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i
;
; GFX8-LABEL: rotr_v4i32:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34
-; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x34
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s11
; GFX8-NEXT: v_mov_b32_e32 v1, s10
@@ -198,22 +198,22 @@ define amdgpu_kernel void @rotr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i
; GFX10-LABEL: rotr_v4i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34
-; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x34
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_alignbit_b32 v3, s7, s7, s11
; GFX10-NEXT: v_alignbit_b32 v2, s6, s6, s10
; GFX10-NEXT: v_alignbit_b32 v1, s5, s5, s9
; GFX10-NEXT: v_alignbit_b32 v0, s4, s4, s8
-; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: rotr_v4i32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b256 s[4:11], s[0:1], 0x34
-; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: s_load_b256 s[4:11], s[2:3], 0x34
+; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_alignbit_b32 v3, s7, s7, s11