diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll | 348 |
1 files changed, 130 insertions, 218 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 9d70a2437e55..265c8c409f2d 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -197,7 +197,7 @@ define amdgpu_kernel void @flat_system_unordered_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -390,7 +390,7 @@ define amdgpu_kernel void @flat_system_monotonic_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -601,7 +601,7 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -834,17 +834,13 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -1004,7 +1000,8 @@ define amdgpu_kernel void @flat_system_unordered_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1163,6 +1160,7 @@ define amdgpu_kernel void @flat_system_monotonic_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { @@ -1351,9 +1349,8 @@ define amdgpu_kernel void @flat_system_release_store( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm @@ -1543,9 +1540,8 @@ define amdgpu_kernel void @flat_system_seq_cst_store( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm @@ -1706,6 +1702,7 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1898,6 +1895,7 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -2088,9 +2086,8 @@ define amdgpu_kernel void @flat_system_release_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm @@ -2313,9 +2310,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -2540,9 +2536,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -2766,10 +2761,11 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -3023,16 +3019,13 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -3286,16 +3279,13 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -3547,6 +3537,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -3832,6 +3823,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -4115,9 +4107,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm @@ -4433,9 +4424,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -4753,9 +4743,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -5044,6 +5033,7 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -5331,6 +5321,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -5647,9 +5638,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -5967,9 +5957,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -6287,9 +6276,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -6607,9 +6595,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -6927,9 +6914,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -7247,9 +7233,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -7567,9 +7552,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -7887,9 +7871,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -8189,9 +8172,10 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8506,10 +8490,11 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8836,13 +8821,12 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9190,16 +9174,13 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9547,16 +9528,13 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9875,12 +9853,11 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10195,10 +10172,11 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10546,16 +10524,13 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10903,16 +10878,13 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11260,16 +11232,13 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11617,16 +11586,13 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11970,14 +11936,13 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12325,16 +12290,13 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12682,16 +12644,13 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13039,16 +12998,13 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13243,7 +13199,7 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -13436,7 +13392,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -13658,7 +13614,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -13901,18 +13857,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -14072,7 +14024,8 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -14231,6 +14184,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { @@ -14419,10 +14373,9 @@ define amdgpu_kernel void @flat_system_one_as_release_store( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { @@ -14611,10 +14564,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { @@ -14774,6 +14726,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -14962,6 +14915,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -15152,10 +15106,9 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -15373,10 +15326,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -15596,10 +15548,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -15832,11 +15783,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -16100,17 +16052,14 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -16374,17 +16323,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -16636,6 +16582,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -16917,6 +16864,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -17200,10 +17148,9 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -17514,10 +17461,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -17830,10 +17776,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -18117,6 +18062,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -18400,6 +18346,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -18712,10 +18659,9 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -19028,10 +18974,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -19344,10 +19289,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -19660,10 +19604,9 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -19976,10 +19919,9 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -20292,10 +20234,9 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -20608,10 +20549,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -20924,10 +20864,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -21226,9 +21165,10 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21553,11 +21493,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21884,13 +21825,12 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -22248,17 +22188,14 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -22616,17 +22553,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -22955,13 +22889,12 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -23286,11 +23219,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -23648,17 +23582,14 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -24016,17 +23947,14 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -24384,17 +24312,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -24752,17 +24677,14 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -25116,15 +25038,14 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -25482,17 +25403,14 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -25850,17 +25768,14 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -26218,17 +26133,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: |
