summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll90
1 files changed, 38 insertions, 52 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
index 39a3c9aade58..10fd34f08b83 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
@@ -628,12 +628,11 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX8-NEXT: s_ff1_i32_b64 s3, s[0:1]
; GFX8-NEXT: s_mov_b32 m0, s3
-; GFX8-NEXT: v_readlane_b32 s8, v0, s3
-; GFX8-NEXT: s_lshl_b64 s[6:7], 1, s3
+; GFX8-NEXT: v_readlane_b32 s6, v0, s3
; GFX8-NEXT: v_writelane_b32 v1, s2, m0
-; GFX8-NEXT: s_add_i32 s2, s2, s8
+; GFX8-NEXT: s_add_i32 s2, s2, s6
+; GFX8-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX8-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7]
-; GFX8-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX8-NEXT: s_cbranch_scc1 .LBB2_1
; GFX8-NEXT: ; %bb.2: ; %ComputeEnd
; GFX8-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -670,12 +669,11 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_ff1_i32_b64 s3, s[0:1]
; GFX9-NEXT: s_mov_b32 m0, s3
-; GFX9-NEXT: v_readlane_b32 s8, v0, s3
-; GFX9-NEXT: s_lshl_b64 s[6:7], 1, s3
+; GFX9-NEXT: v_readlane_b32 s6, v0, s3
; GFX9-NEXT: v_writelane_b32 v1, s2, m0
-; GFX9-NEXT: s_add_i32 s2, s2, s8
+; GFX9-NEXT: s_add_i32 s2, s2, s6
+; GFX9-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7]
-; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX9-NEXT: s_cbranch_scc1 .LBB2_1
; GFX9-NEXT: ; %bb.2: ; %ComputeEnd
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -711,11 +709,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX10W64-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10W64-NEXT: s_ff1_i32_b64 s3, s[0:1]
; GFX10W64-NEXT: v_readlane_b32 s8, v0, s3
-; GFX10W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX10W64-NEXT: v_writelane_b32 v1, s2, s3
-; GFX10W64-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7]
+; GFX10W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX10W64-NEXT: s_add_i32 s2, s2, s8
-; GFX10W64-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX10W64-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7]
; GFX10W64-NEXT: s_cbranch_scc1 .LBB2_1
; GFX10W64-NEXT: ; %bb.2: ; %ComputeEnd
; GFX10W64-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -752,11 +749,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX10W32-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10W32-NEXT: s_ff1_i32_b32 s2, s1
; GFX10W32-NEXT: v_readlane_b32 s3, v0, s2
-; GFX10W32-NEXT: s_lshl_b32 s6, 1, s2
; GFX10W32-NEXT: v_writelane_b32 v1, s0, s2
-; GFX10W32-NEXT: s_andn2_b32 s1, s1, s6
+; GFX10W32-NEXT: s_lshl_b32 s2, 1, s2
; GFX10W32-NEXT: s_add_i32 s0, s0, s3
-; GFX10W32-NEXT: s_cmp_lg_u32 s1, 0
+; GFX10W32-NEXT: s_andn2_b32 s1, s1, s2
; GFX10W32-NEXT: s_cbranch_scc1 .LBB2_1
; GFX10W32-NEXT: ; %bb.2: ; %ComputeEnd
; GFX10W32-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -794,11 +790,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX11W64-NEXT: s_ctz_i32_b64 s3, s[0:1]
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11W64-NEXT: v_readlane_b32 s8, v1, s3
-; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX11W64-NEXT: v_writelane_b32 v0, s2, s3
-; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
+; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX11W64-NEXT: s_add_i32 s2, s2, s8
-; GFX11W64-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
; GFX11W64-NEXT: s_cbranch_scc1 .LBB2_1
; GFX11W64-NEXT: ; %bb.2: ; %ComputeEnd
; GFX11W64-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -839,11 +834,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX11W32-NEXT: s_ctz_i32_b32 s2, s1
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11W32-NEXT: v_readlane_b32 s3, v1, s2
-; GFX11W32-NEXT: s_lshl_b32 s6, 1, s2
; GFX11W32-NEXT: v_writelane_b32 v0, s0, s2
-; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s6
+; GFX11W32-NEXT: s_lshl_b32 s2, 1, s2
; GFX11W32-NEXT: s_add_i32 s0, s0, s3
-; GFX11W32-NEXT: s_cmp_lg_u32 s1, 0
+; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s2
; GFX11W32-NEXT: s_cbranch_scc1 .LBB2_1
; GFX11W32-NEXT: ; %bb.2: ; %ComputeEnd
; GFX11W32-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -880,11 +874,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX12W64-NEXT: s_ctz_i32_b64 s3, s[0:1]
; GFX12W64-NEXT: s_wait_alu 0xfffe
; GFX12W64-NEXT: v_readlane_b32 s8, v1, s3
-; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX12W64-NEXT: v_writelane_b32 v0, s2, s3
-; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
+; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX12W64-NEXT: s_add_co_i32 s2, s2, s8
-; GFX12W64-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
; GFX12W64-NEXT: s_cbranch_scc1 .LBB2_1
; GFX12W64-NEXT: ; %bb.2: ; %ComputeEnd
; GFX12W64-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -923,15 +916,15 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX12W32-NEXT: ; implicit-def: $vgpr0
; GFX12W32-NEXT: .LBB2_1: ; %ComputeLoop
; GFX12W32-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12W32-NEXT: s_wait_alu 0xfffe
; GFX12W32-NEXT: s_ctz_i32_b32 s2, s1
; GFX12W32-NEXT: s_wait_alu 0xfffe
; GFX12W32-NEXT: v_readlane_b32 s3, v1, s2
-; GFX12W32-NEXT: s_lshl_b32 s6, 1, s2
; GFX12W32-NEXT: v_writelane_b32 v0, s0, s2
-; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s6
+; GFX12W32-NEXT: s_lshl_b32 s2, 1, s2
; GFX12W32-NEXT: s_add_co_i32 s0, s0, s3
; GFX12W32-NEXT: s_wait_alu 0xfffe
-; GFX12W32-NEXT: s_cmp_lg_u32 s1, 0
+; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s2
; GFX12W32-NEXT: s_cbranch_scc1 .LBB2_1
; GFX12W32-NEXT: ; %bb.2: ; %ComputeEnd
; GFX12W32-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -1833,12 +1826,11 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX8-NEXT: s_ff1_i32_b64 s3, s[0:1]
; GFX8-NEXT: s_mov_b32 m0, s3
-; GFX8-NEXT: v_readlane_b32 s8, v0, s3
-; GFX8-NEXT: s_lshl_b64 s[6:7], 1, s3
+; GFX8-NEXT: v_readlane_b32 s6, v0, s3
; GFX8-NEXT: v_writelane_b32 v1, s2, m0
-; GFX8-NEXT: s_add_i32 s2, s2, s8
+; GFX8-NEXT: s_add_i32 s2, s2, s6
+; GFX8-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX8-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7]
-; GFX8-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX8-NEXT: s_cbranch_scc1 .LBB7_1
; GFX8-NEXT: ; %bb.2: ; %ComputeEnd
; GFX8-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -1875,12 +1867,11 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_ff1_i32_b64 s3, s[0:1]
; GFX9-NEXT: s_mov_b32 m0, s3
-; GFX9-NEXT: v_readlane_b32 s8, v0, s3
-; GFX9-NEXT: s_lshl_b64 s[6:7], 1, s3
+; GFX9-NEXT: v_readlane_b32 s6, v0, s3
; GFX9-NEXT: v_writelane_b32 v1, s2, m0
-; GFX9-NEXT: s_add_i32 s2, s2, s8
+; GFX9-NEXT: s_add_i32 s2, s2, s6
+; GFX9-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7]
-; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX9-NEXT: s_cbranch_scc1 .LBB7_1
; GFX9-NEXT: ; %bb.2: ; %ComputeEnd
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -1916,11 +1907,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX10W64-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10W64-NEXT: s_ff1_i32_b64 s3, s[0:1]
; GFX10W64-NEXT: v_readlane_b32 s8, v0, s3
-; GFX10W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX10W64-NEXT: v_writelane_b32 v1, s2, s3
-; GFX10W64-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7]
+; GFX10W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX10W64-NEXT: s_add_i32 s2, s2, s8
-; GFX10W64-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX10W64-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7]
; GFX10W64-NEXT: s_cbranch_scc1 .LBB7_1
; GFX10W64-NEXT: ; %bb.2: ; %ComputeEnd
; GFX10W64-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -1957,11 +1947,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX10W32-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10W32-NEXT: s_ff1_i32_b32 s2, s1
; GFX10W32-NEXT: v_readlane_b32 s3, v0, s2
-; GFX10W32-NEXT: s_lshl_b32 s6, 1, s2
; GFX10W32-NEXT: v_writelane_b32 v1, s0, s2
-; GFX10W32-NEXT: s_andn2_b32 s1, s1, s6
+; GFX10W32-NEXT: s_lshl_b32 s2, 1, s2
; GFX10W32-NEXT: s_add_i32 s0, s0, s3
-; GFX10W32-NEXT: s_cmp_lg_u32 s1, 0
+; GFX10W32-NEXT: s_andn2_b32 s1, s1, s2
; GFX10W32-NEXT: s_cbranch_scc1 .LBB7_1
; GFX10W32-NEXT: ; %bb.2: ; %ComputeEnd
; GFX10W32-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -1999,11 +1988,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX11W64-NEXT: s_ctz_i32_b64 s3, s[0:1]
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11W64-NEXT: v_readlane_b32 s8, v1, s3
-; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX11W64-NEXT: v_writelane_b32 v0, s2, s3
-; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
+; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX11W64-NEXT: s_add_i32 s2, s2, s8
-; GFX11W64-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
; GFX11W64-NEXT: s_cbranch_scc1 .LBB7_1
; GFX11W64-NEXT: ; %bb.2: ; %ComputeEnd
; GFX11W64-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2044,11 +2032,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX11W32-NEXT: s_ctz_i32_b32 s2, s1
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11W32-NEXT: v_readlane_b32 s3, v1, s2
-; GFX11W32-NEXT: s_lshl_b32 s6, 1, s2
; GFX11W32-NEXT: v_writelane_b32 v0, s0, s2
-; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s6
+; GFX11W32-NEXT: s_lshl_b32 s2, 1, s2
; GFX11W32-NEXT: s_add_i32 s0, s0, s3
-; GFX11W32-NEXT: s_cmp_lg_u32 s1, 0
+; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s2
; GFX11W32-NEXT: s_cbranch_scc1 .LBB7_1
; GFX11W32-NEXT: ; %bb.2: ; %ComputeEnd
; GFX11W32-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2086,11 +2073,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX12W64-NEXT: s_ctz_i32_b64 s3, s[0:1]
; GFX12W64-NEXT: s_wait_alu 0xfffe
; GFX12W64-NEXT: v_readlane_b32 s8, v1, s3
-; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX12W64-NEXT: v_writelane_b32 v0, s2, s3
-; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
+; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3
; GFX12W64-NEXT: s_add_co_i32 s2, s2, s8
-; GFX12W64-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
; GFX12W64-NEXT: s_cbranch_scc1 .LBB7_1
; GFX12W64-NEXT: ; %bb.2: ; %ComputeEnd
; GFX12W64-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2129,15 +2115,15 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
; GFX12W32-NEXT: ; implicit-def: $vgpr0
; GFX12W32-NEXT: .LBB7_1: ; %ComputeLoop
; GFX12W32-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12W32-NEXT: s_wait_alu 0xfffe
; GFX12W32-NEXT: s_ctz_i32_b32 s2, s1
; GFX12W32-NEXT: s_wait_alu 0xfffe
; GFX12W32-NEXT: v_readlane_b32 s3, v1, s2
-; GFX12W32-NEXT: s_lshl_b32 s6, 1, s2
; GFX12W32-NEXT: v_writelane_b32 v0, s0, s2
-; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s6
+; GFX12W32-NEXT: s_lshl_b32 s2, 1, s2
; GFX12W32-NEXT: s_add_co_i32 s0, s0, s3
; GFX12W32-NEXT: s_wait_alu 0xfffe
-; GFX12W32-NEXT: s_cmp_lg_u32 s1, 0
+; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s2
; GFX12W32-NEXT: s_cbranch_scc1 .LBB7_1
; GFX12W32-NEXT: ; %bb.2: ; %ComputeEnd
; GFX12W32-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0