diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/rotr.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/rotr.ll | 44 |
1 files changed, 22 insertions, 22 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/rotr.ll b/llvm/test/CodeGen/AMDGPU/rotr.ll index 058ee589bc4b..0e1dd69d930a 100644 --- a/llvm/test/CodeGen/AMDGPU/rotr.ll +++ b/llvm/test/CodeGen/AMDGPU/rotr.ll @@ -19,7 +19,7 @@ define amdgpu_kernel void @rotr_i32(ptr addrspace(1) %in, i32 %x, i32 %y) { ; ; SI-LABEL: rotr_i32: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) @@ -32,7 +32,7 @@ define amdgpu_kernel void @rotr_i32(ptr addrspace(1) %in, i32 %x, i32 %y) { ; ; GFX8-LABEL: rotr_i32: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s3 ; GFX8-NEXT: v_alignbit_b32 v2, s2, s2, v0 @@ -43,7 +43,7 @@ define amdgpu_kernel void @rotr_i32(ptr addrspace(1) %in, i32 %x, i32 %y) { ; ; GFX10-LABEL: rotr_i32: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_alignbit_b32 v1, s2, s2, s3 @@ -52,7 +52,7 @@ define amdgpu_kernel void @rotr_i32(ptr addrspace(1) %in, i32 %x, i32 %y) { ; ; GFX11-LABEL: rotr_i32: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_alignbit_b32 v1, s2, s2, s3 @@ -84,8 +84,8 @@ define amdgpu_kernel void @rotr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i ; ; SI-LABEL: rotr_v2i32: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) @@ -98,8 +98,8 @@ define amdgpu_kernel void @rotr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i ; ; GFX8-LABEL: rotr_v2i32: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2c +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s7 ; GFX8-NEXT: v_mov_b32_e32 v2, s6 @@ -113,20 +113,20 @@ define amdgpu_kernel void @rotr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i ; GFX10-LABEL: rotr_v2i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2c +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_alignbit_b32 v1, s5, s5, s7 ; GFX10-NEXT: v_alignbit_b32 v0, s4, s4, s6 -; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: rotr_v2i32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x2c -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x2c +; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_alignbit_b32 v1, s5, s5, s7 @@ -161,8 +161,8 @@ define amdgpu_kernel void @rotr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i ; ; SI-LABEL: rotr_v4i32: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0xd +; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) @@ -179,8 +179,8 @@ define amdgpu_kernel void @rotr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i ; ; GFX8-LABEL: rotr_v4i32: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x34 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s11 ; GFX8-NEXT: v_mov_b32_e32 v1, s10 @@ -198,22 +198,22 @@ define amdgpu_kernel void @rotr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i ; GFX10-LABEL: rotr_v4i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x34 +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_alignbit_b32 v3, s7, s7, s11 ; GFX10-NEXT: v_alignbit_b32 v2, s6, s6, s10 ; GFX10-NEXT: v_alignbit_b32 v1, s5, s5, s9 ; GFX10-NEXT: v_alignbit_b32 v0, s4, s4, s8 -; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: rotr_v4i32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b256 s[4:11], s[0:1], 0x34 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: s_load_b256 s[4:11], s[2:3], 0x34 +; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v4, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_alignbit_b32 v3, s7, s7, s11 |
