diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll | 406 |
1 files changed, 188 insertions, 218 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll index 36adbc001111..4caaad646378 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -197,7 +197,7 @@ define amdgpu_kernel void @flat_agent_unordered_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -390,7 +390,7 @@ define amdgpu_kernel void @flat_agent_monotonic_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -599,7 +599,7 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -830,17 +830,13 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -1000,7 +996,8 @@ define amdgpu_kernel void @flat_agent_unordered_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1159,6 +1156,7 @@ define amdgpu_kernel void @flat_agent_monotonic_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { @@ -1342,9 +1340,9 @@ define amdgpu_kernel void @flat_agent_release_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm @@ -1529,9 +1527,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm @@ -1692,6 +1690,7 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1882,6 +1881,7 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -2067,9 +2067,9 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm @@ -2285,9 +2285,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -2505,9 +2505,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -2729,10 +2729,11 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2979,16 +2980,14 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -3235,16 +3234,14 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -3496,6 +3493,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -3779,6 +3777,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -4057,9 +4056,9 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm @@ -4368,9 +4367,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -4681,9 +4680,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -4970,6 +4969,7 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -5255,6 +5255,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -5564,9 +5565,9 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -5877,9 +5878,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -6190,9 +6191,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -6503,9 +6504,9 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -6816,9 +6817,9 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -7129,9 +7130,9 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -7442,9 +7443,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -7755,9 +7756,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0 @@ -8057,9 +8058,10 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8372,10 +8374,11 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8697,13 +8700,13 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9044,16 +9047,14 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9394,16 +9395,14 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9720,12 +9719,11 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10038,10 +10036,11 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10382,16 +10381,14 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10732,16 +10729,14 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11082,16 +11077,14 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11432,16 +11425,14 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11778,14 +11769,14 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12126,16 +12117,14 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12476,16 +12465,14 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12826,16 +12813,14 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13030,7 +13015,7 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -13223,7 +13208,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -13443,7 +13428,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -13684,18 +13669,14 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -13855,7 +13836,8 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -14014,6 +13996,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { @@ -14197,10 +14180,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { @@ -14384,10 +14367,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { @@ -14547,6 +14530,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -14733,6 +14717,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -14918,10 +14903,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -15132,10 +15117,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -15348,10 +15333,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -15582,11 +15567,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -15843,17 +15829,15 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -16110,17 +16094,15 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -16372,6 +16354,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -16651,6 +16634,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -16929,10 +16913,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -17236,10 +17220,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -17545,10 +17529,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -17830,6 +17814,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -18111,6 +18096,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -18416,10 +18402,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -18725,10 +18711,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -19034,10 +19020,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -19343,10 +19329,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -19652,10 +19638,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -19961,10 +19947,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -20270,10 +20256,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -20579,10 +20565,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -20881,9 +20867,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21206,11 +21193,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21532,13 +21520,13 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21889,17 +21877,15 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -22250,17 +22236,15 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -22587,13 +22571,12 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -22916,11 +22899,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -23271,17 +23255,15 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -23632,17 +23614,15 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -23993,17 +23973,15 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -24354,17 +24332,15 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -24711,15 +24687,15 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -25070,17 +25046,15 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -25431,17 +25405,15 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -25792,17 +25764,15 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: |
