diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll | 90 |
1 files changed, 38 insertions, 52 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll index e4def28667ed..9afc0c62e846 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll @@ -611,12 +611,11 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_ff1_i32_b64 s3, s[0:1] ; GFX8-NEXT: s_mov_b32 m0, s3 -; GFX8-NEXT: v_readlane_b32 s8, v0, s3 -; GFX8-NEXT: s_lshl_b64 s[6:7], 1, s3 +; GFX8-NEXT: v_readlane_b32 s6, v0, s3 ; GFX8-NEXT: v_writelane_b32 v1, s2, m0 -; GFX8-NEXT: s_add_i32 s2, s2, s8 +; GFX8-NEXT: s_add_i32 s2, s2, s6 +; GFX8-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX8-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] -; GFX8-NEXT: s_cmp_lg_u64 s[0:1], 0 ; GFX8-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX8-NEXT: ; %bb.2: ; %ComputeEnd ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 @@ -652,12 +651,11 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_ff1_i32_b64 s3, s[0:1] ; GFX9-NEXT: s_mov_b32 m0, s3 -; GFX9-NEXT: v_readlane_b32 s8, v0, s3 -; GFX9-NEXT: s_lshl_b64 s[6:7], 1, s3 +; GFX9-NEXT: v_readlane_b32 s6, v0, s3 ; GFX9-NEXT: v_writelane_b32 v1, s2, m0 -; GFX9-NEXT: s_add_i32 s2, s2, s8 +; GFX9-NEXT: s_add_i32 s2, s2, s6 +; GFX9-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] -; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 ; GFX9-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX9-NEXT: ; %bb.2: ; %ComputeEnd ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 @@ -692,11 +690,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX10W64-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10W64-NEXT: s_ff1_i32_b64 s3, s[0:1] ; GFX10W64-NEXT: v_readlane_b32 s8, v0, s3 -; GFX10W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX10W64-NEXT: v_writelane_b32 v1, s2, s3 -; GFX10W64-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] +; GFX10W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX10W64-NEXT: s_add_i32 s2, s2, s8 -; GFX10W64-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX10W64-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] ; GFX10W64-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX10W64-NEXT: ; %bb.2: ; %ComputeEnd ; GFX10W64-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 @@ -732,11 +729,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX10W32-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10W32-NEXT: s_ff1_i32_b32 s2, s1 ; GFX10W32-NEXT: v_readlane_b32 s3, v0, s2 -; GFX10W32-NEXT: s_lshl_b32 s6, 1, s2 ; GFX10W32-NEXT: v_writelane_b32 v1, s0, s2 -; GFX10W32-NEXT: s_andn2_b32 s1, s1, s6 +; GFX10W32-NEXT: s_lshl_b32 s2, 1, s2 ; GFX10W32-NEXT: s_add_i32 s0, s0, s3 -; GFX10W32-NEXT: s_cmp_lg_u32 s1, 0 +; GFX10W32-NEXT: s_andn2_b32 s1, s1, s2 ; GFX10W32-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX10W32-NEXT: ; %bb.2: ; %ComputeEnd ; GFX10W32-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 @@ -773,11 +769,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX11W64-NEXT: s_ctz_i32_b64 s3, s[0:1] ; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11W64-NEXT: v_readlane_b32 s8, v1, s3 -; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX11W64-NEXT: v_writelane_b32 v0, s2, s3 -; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7] +; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX11W64-NEXT: s_add_i32 s2, s2, s8 -; GFX11W64-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7] ; GFX11W64-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX11W64-NEXT: ; %bb.2: ; %ComputeEnd ; GFX11W64-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -817,11 +812,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX11W32-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11W32-NEXT: v_readlane_b32 s3, v1, s2 -; GFX11W32-NEXT: s_lshl_b32 s6, 1, s2 ; GFX11W32-NEXT: v_writelane_b32 v0, s0, s2 -; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s6 +; GFX11W32-NEXT: s_lshl_b32 s2, 1, s2 ; GFX11W32-NEXT: s_add_i32 s0, s0, s3 -; GFX11W32-NEXT: s_cmp_lg_u32 s1, 0 +; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s2 ; GFX11W32-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX11W32-NEXT: ; %bb.2: ; %ComputeEnd ; GFX11W32-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -858,11 +852,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX12W64-NEXT: s_ctz_i32_b64 s3, s[0:1] ; GFX12W64-NEXT: s_wait_alu 0xfffe ; GFX12W64-NEXT: v_readlane_b32 s8, v1, s3 -; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX12W64-NEXT: v_writelane_b32 v0, s2, s3 -; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7] +; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX12W64-NEXT: s_add_co_i32 s2, s2, s8 -; GFX12W64-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7] ; GFX12W64-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX12W64-NEXT: ; %bb.2: ; %ComputeEnd ; GFX12W64-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -900,15 +893,15 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX12W32-NEXT: ; implicit-def: $vgpr0 ; GFX12W32-NEXT: .LBB2_1: ; %ComputeLoop ; GFX12W32-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX12W32-NEXT: s_wait_alu 0xfffe ; GFX12W32-NEXT: s_ctz_i32_b32 s2, s1 ; GFX12W32-NEXT: s_wait_alu 0xfffe ; GFX12W32-NEXT: v_readlane_b32 s3, v1, s2 -; GFX12W32-NEXT: s_lshl_b32 s6, 1, s2 ; GFX12W32-NEXT: v_writelane_b32 v0, s0, s2 -; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s6 +; GFX12W32-NEXT: s_lshl_b32 s2, 1, s2 ; GFX12W32-NEXT: s_add_co_i32 s0, s0, s3 ; GFX12W32-NEXT: s_wait_alu 0xfffe -; GFX12W32-NEXT: s_cmp_lg_u32 s1, 0 +; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s2 ; GFX12W32-NEXT: s_cbranch_scc1 .LBB2_1 ; GFX12W32-NEXT: ; %bb.2: ; %ComputeEnd ; GFX12W32-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -1665,12 +1658,11 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_ff1_i32_b64 s3, s[0:1] ; GFX8-NEXT: s_mov_b32 m0, s3 -; GFX8-NEXT: v_readlane_b32 s8, v0, s3 -; GFX8-NEXT: s_lshl_b64 s[6:7], 1, s3 +; GFX8-NEXT: v_readlane_b32 s6, v0, s3 ; GFX8-NEXT: v_writelane_b32 v1, s2, m0 -; GFX8-NEXT: s_add_i32 s2, s2, s8 +; GFX8-NEXT: s_add_i32 s2, s2, s6 +; GFX8-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX8-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] -; GFX8-NEXT: s_cmp_lg_u64 s[0:1], 0 ; GFX8-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX8-NEXT: ; %bb.2: ; %ComputeEnd ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 @@ -1706,12 +1698,11 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_ff1_i32_b64 s3, s[0:1] ; GFX9-NEXT: s_mov_b32 m0, s3 -; GFX9-NEXT: v_readlane_b32 s8, v0, s3 -; GFX9-NEXT: s_lshl_b64 s[6:7], 1, s3 +; GFX9-NEXT: v_readlane_b32 s6, v0, s3 ; GFX9-NEXT: v_writelane_b32 v1, s2, m0 -; GFX9-NEXT: s_add_i32 s2, s2, s8 +; GFX9-NEXT: s_add_i32 s2, s2, s6 +; GFX9-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] -; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 ; GFX9-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX9-NEXT: ; %bb.2: ; %ComputeEnd ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 @@ -1746,11 +1737,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX10W64-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10W64-NEXT: s_ff1_i32_b64 s3, s[0:1] ; GFX10W64-NEXT: v_readlane_b32 s8, v0, s3 -; GFX10W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX10W64-NEXT: v_writelane_b32 v1, s2, s3 -; GFX10W64-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] +; GFX10W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX10W64-NEXT: s_add_i32 s2, s2, s8 -; GFX10W64-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX10W64-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] ; GFX10W64-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX10W64-NEXT: ; %bb.2: ; %ComputeEnd ; GFX10W64-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 @@ -1786,11 +1776,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX10W32-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10W32-NEXT: s_ff1_i32_b32 s2, s1 ; GFX10W32-NEXT: v_readlane_b32 s3, v0, s2 -; GFX10W32-NEXT: s_lshl_b32 s6, 1, s2 ; GFX10W32-NEXT: v_writelane_b32 v1, s0, s2 -; GFX10W32-NEXT: s_andn2_b32 s1, s1, s6 +; GFX10W32-NEXT: s_lshl_b32 s2, 1, s2 ; GFX10W32-NEXT: s_add_i32 s0, s0, s3 -; GFX10W32-NEXT: s_cmp_lg_u32 s1, 0 +; GFX10W32-NEXT: s_andn2_b32 s1, s1, s2 ; GFX10W32-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX10W32-NEXT: ; %bb.2: ; %ComputeEnd ; GFX10W32-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 @@ -1827,11 +1816,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX11W64-NEXT: s_ctz_i32_b64 s3, s[0:1] ; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11W64-NEXT: v_readlane_b32 s8, v1, s3 -; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX11W64-NEXT: v_writelane_b32 v0, s2, s3 -; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7] +; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX11W64-NEXT: s_add_i32 s2, s2, s8 -; GFX11W64-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7] ; GFX11W64-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX11W64-NEXT: ; %bb.2: ; %ComputeEnd ; GFX11W64-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -1871,11 +1859,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX11W32-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11W32-NEXT: v_readlane_b32 s3, v1, s2 -; GFX11W32-NEXT: s_lshl_b32 s6, 1, s2 ; GFX11W32-NEXT: v_writelane_b32 v0, s0, s2 -; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s6 +; GFX11W32-NEXT: s_lshl_b32 s2, 1, s2 ; GFX11W32-NEXT: s_add_i32 s0, s0, s3 -; GFX11W32-NEXT: s_cmp_lg_u32 s1, 0 +; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s2 ; GFX11W32-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX11W32-NEXT: ; %bb.2: ; %ComputeEnd ; GFX11W32-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -1913,11 +1900,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX12W64-NEXT: s_ctz_i32_b64 s3, s[0:1] ; GFX12W64-NEXT: s_wait_alu 0xfffe ; GFX12W64-NEXT: v_readlane_b32 s8, v1, s3 -; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX12W64-NEXT: v_writelane_b32 v0, s2, s3 -; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7] +; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3 ; GFX12W64-NEXT: s_add_co_i32 s2, s2, s8 -; GFX12W64-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7] ; GFX12W64-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX12W64-NEXT: ; %bb.2: ; %ComputeEnd ; GFX12W64-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 @@ -1955,15 +1941,15 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr ; GFX12W32-NEXT: ; implicit-def: $vgpr0 ; GFX12W32-NEXT: .LBB6_1: ; %ComputeLoop ; GFX12W32-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX12W32-NEXT: s_wait_alu 0xfffe ; GFX12W32-NEXT: s_ctz_i32_b32 s2, s1 ; GFX12W32-NEXT: s_wait_alu 0xfffe ; GFX12W32-NEXT: v_readlane_b32 s3, v1, s2 -; GFX12W32-NEXT: s_lshl_b32 s6, 1, s2 ; GFX12W32-NEXT: v_writelane_b32 v0, s0, s2 -; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s6 +; GFX12W32-NEXT: s_lshl_b32 s2, 1, s2 ; GFX12W32-NEXT: s_add_co_i32 s0, s0, s3 ; GFX12W32-NEXT: s_wait_alu 0xfffe -; GFX12W32-NEXT: s_cmp_lg_u32 s1, 0 +; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s2 ; GFX12W32-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX12W32-NEXT: ; %bb.2: ; %ComputeEnd ; GFX12W32-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 |
