summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll406
1 files changed, 188 insertions, 218 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
index 36adbc001111..4caaad646378 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
@@ -197,7 +197,7 @@ define amdgpu_kernel void @flat_agent_unordered_load(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -390,7 +390,7 @@ define amdgpu_kernel void @flat_agent_monotonic_load(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -599,7 +599,7 @@ define amdgpu_kernel void @flat_agent_acquire_load(
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -830,17 +830,13 @@ define amdgpu_kernel void @flat_agent_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -1000,7 +996,8 @@ define amdgpu_kernel void @flat_agent_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
@@ -1159,6 +1156,7 @@ define amdgpu_kernel void @flat_agent_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1342,9 +1340,9 @@ define amdgpu_kernel void @flat_agent_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -1529,9 +1527,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -1692,6 +1690,7 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1882,6 +1881,7 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2067,9 +2067,9 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -2285,9 +2285,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2505,9 +2505,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2729,10 +2729,11 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -2979,16 +2980,14 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -3235,16 +3234,14 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -3496,6 +3493,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3779,6 +3777,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -4057,9 +4056,9 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -4368,9 +4367,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4681,9 +4680,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4970,6 +4969,7 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -5255,6 +5255,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -5564,9 +5565,9 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5877,9 +5878,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6190,9 +6191,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6503,9 +6504,9 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6816,9 +6817,9 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7129,9 +7130,9 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7442,9 +7443,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7755,9 +7756,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -8057,9 +8058,10 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -8372,10 +8374,11 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -8697,13 +8700,13 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -9044,16 +9047,14 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -9394,16 +9395,14 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -9720,12 +9719,11 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -10038,10 +10036,11 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -10382,16 +10381,14 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -10732,16 +10729,14 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -11082,16 +11077,14 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -11432,16 +11425,14 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -11778,14 +11769,14 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -12126,16 +12117,14 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -12476,16 +12465,14 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -12826,16 +12813,14 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -13030,7 +13015,7 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -13223,7 +13208,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -13443,7 +13428,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load(
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -13684,18 +13669,14 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -13855,7 +13836,8 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
@@ -14014,6 +13996,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14197,10 +14180,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14384,10 +14367,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14547,6 +14530,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -14733,6 +14717,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -14918,10 +14903,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -15132,10 +15117,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -15348,10 +15333,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -15582,11 +15567,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -15843,17 +15829,15 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -16110,17 +16094,15 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -16372,6 +16354,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16651,6 +16634,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -16929,10 +16913,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17236,10 +17220,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17545,10 +17529,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17830,6 +17814,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18111,6 +18096,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18416,10 +18402,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18725,10 +18711,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19034,10 +19020,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19343,10 +19329,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19652,10 +19638,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19961,10 +19947,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -20270,10 +20256,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -20579,10 +20565,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -20881,9 +20867,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -21206,11 +21193,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -21532,13 +21520,13 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -21889,17 +21877,15 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -22250,17 +22236,15 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -22587,13 +22571,12 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -22916,11 +22899,12 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -23271,17 +23255,15 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -23632,17 +23614,15 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -23993,17 +23973,15 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -24354,17 +24332,15 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -24711,15 +24687,15 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -25070,17 +25046,15 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -25431,17 +25405,15 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -25792,17 +25764,15 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry: