summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll348
1 files changed, 130 insertions, 218 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
index 9d70a2437e55..265c8c409f2d 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
@@ -197,7 +197,7 @@ define amdgpu_kernel void @flat_system_unordered_load(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -390,7 +390,7 @@ define amdgpu_kernel void @flat_system_monotonic_load(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -601,7 +601,7 @@ define amdgpu_kernel void @flat_system_acquire_load(
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -834,17 +834,13 @@ define amdgpu_kernel void @flat_system_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -1004,7 +1000,8 @@ define amdgpu_kernel void @flat_system_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
@@ -1163,6 +1160,7 @@ define amdgpu_kernel void @flat_system_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1351,9 +1349,8 @@ define amdgpu_kernel void @flat_system_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -1543,9 +1540,8 @@ define amdgpu_kernel void @flat_system_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -1706,6 +1702,7 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1898,6 +1895,7 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2088,9 +2086,8 @@ define amdgpu_kernel void @flat_system_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -2313,9 +2310,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2540,9 +2536,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2766,10 +2761,11 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -3023,16 +3019,13 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -3286,16 +3279,13 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -3547,6 +3537,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3832,6 +3823,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -4115,9 +4107,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -4433,9 +4424,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4753,9 +4743,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5044,6 +5033,7 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -5331,6 +5321,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -5647,9 +5638,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5967,9 +5957,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6287,9 +6276,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6607,9 +6595,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6927,9 +6914,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7247,9 +7233,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7567,9 +7552,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7887,9 +7871,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -8189,9 +8172,10 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -8506,10 +8490,11 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -8836,13 +8821,12 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -9190,16 +9174,13 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -9547,16 +9528,13 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -9875,12 +9853,11 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -10195,10 +10172,11 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -10546,16 +10524,13 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -10903,16 +10878,13 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -11260,16 +11232,13 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -11617,16 +11586,13 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -11970,14 +11936,13 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -12325,16 +12290,13 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -12682,16 +12644,13 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -13039,16 +12998,13 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -13243,7 +13199,7 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -13436,7 +13392,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -13658,7 +13614,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load(
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -13901,18 +13857,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
@@ -14072,7 +14024,8 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
@@ -14231,6 +14184,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14419,10 +14373,9 @@ define amdgpu_kernel void @flat_system_one_as_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14611,10 +14564,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14774,6 +14726,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -14962,6 +14915,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15152,10 +15106,9 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -15373,10 +15326,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15596,10 +15548,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15832,11 +15783,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -16100,17 +16052,14 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -16374,17 +16323,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -16636,6 +16582,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16917,6 +16864,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17200,10 +17148,9 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17514,10 +17461,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17830,10 +17776,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18117,6 +18062,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18400,6 +18346,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18712,10 +18659,9 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19028,10 +18974,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19344,10 +19289,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19660,10 +19604,9 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19976,10 +19919,9 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -20292,10 +20234,9 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -20608,10 +20549,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -20924,10 +20864,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -21226,9 +21165,10 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -21553,11 +21493,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -21884,13 +21825,12 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -22248,17 +22188,14 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -22616,17 +22553,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -22955,13 +22889,12 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -23286,11 +23219,12 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -23648,17 +23582,14 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -24016,17 +23947,14 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -24384,17 +24312,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -24752,17 +24677,14 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -25116,15 +25038,14 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -25482,17 +25403,14 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -25850,17 +25768,14 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -26218,17 +26133,14 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry: