diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll | 185 |
1 files changed, 134 insertions, 51 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll index f086542b3d1f..8734e7152e28 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll @@ -197,7 +197,7 @@ define amdgpu_kernel void @flat_wavefront_unordered_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -390,7 +390,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -583,7 +583,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -776,7 +776,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -936,7 +936,8 @@ define amdgpu_kernel void @flat_wavefront_unordered_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1095,7 +1096,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1254,7 +1256,8 @@ define amdgpu_kernel void @flat_wavefront_release_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1413,7 +1416,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1572,6 +1576,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1731,6 +1736,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1890,6 +1896,7 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -2049,6 +2056,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -2208,6 +2216,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -2411,9 +2420,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2617,9 +2627,10 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2823,9 +2834,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -3077,6 +3089,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -3329,6 +3342,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -3581,6 +3595,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -3833,6 +3848,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4085,6 +4101,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4337,6 +4354,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4589,6 +4607,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4841,6 +4860,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5093,6 +5113,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5345,6 +5366,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5597,6 +5619,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5849,6 +5872,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6101,6 +6125,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6353,6 +6378,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6605,6 +6631,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6901,9 +6928,10 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -7201,9 +7229,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -7501,9 +7530,10 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -7801,9 +7831,10 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8101,9 +8132,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8401,9 +8433,10 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8701,9 +8734,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9001,9 +9035,10 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9301,9 +9336,10 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9601,9 +9637,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9901,9 +9938,10 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10201,9 +10239,10 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10501,9 +10540,10 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10801,9 +10841,10 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11101,9 +11142,10 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11298,7 +11340,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -11491,7 +11533,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -11684,7 +11726,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -11877,7 +11919,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_load( ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -12037,7 +12079,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -12196,7 +12239,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -12355,7 +12399,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -12514,7 +12559,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store( ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -12673,6 +12719,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -12832,6 +12879,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -12991,6 +13039,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -13150,6 +13199,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -13309,6 +13359,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { @@ -13512,9 +13563,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -13718,9 +13770,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -13924,9 +13977,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -14178,6 +14232,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -14430,6 +14485,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -14682,6 +14738,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -14934,6 +14991,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -15186,6 +15244,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -15438,6 +15497,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -15690,6 +15750,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -15942,6 +16003,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -16194,6 +16256,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -16446,6 +16509,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -16698,6 +16762,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -16950,6 +17015,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -17202,6 +17268,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -17454,6 +17521,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -17706,6 +17774,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -18002,9 +18071,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -18302,9 +18372,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -18602,9 +18673,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -18902,9 +18974,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -19202,9 +19275,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -19502,9 +19576,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -19802,9 +19877,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -20102,9 +20178,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -20402,9 +20479,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -20702,9 +20780,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21002,9 +21081,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21302,9 +21382,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21602,9 +21683,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -21902,9 +21984,10 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: v_mov_b32_e32 v1, s2 ; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE +; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: |
