summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll845
1 files changed, 817 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index b3f6533d4388..d288bfc6a09d 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -1066,7 +1066,8 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
@@ -1146,7 +1147,8 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
@@ -1231,7 +1233,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
@@ -1316,7 +1319,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
@@ -1391,6 +1395,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
@@ -1462,6 +1468,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
@@ -1538,6 +1546,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
@@ -1614,12 +1624,809 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
}
+
+define amdgpu_kernel void @cluster_acquire_fence() {
+; GFX6-LABEL: cluster_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: cluster_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: cluster_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: cluster_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: cluster_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: cluster_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: cluster_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX942-NOTTGSPLIT-LABEL: cluster_acquire_fence:
+; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX942-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX942-TGSPLIT-LABEL: cluster_acquire_fence:
+; GFX942-TGSPLIT: ; %bb.0: ; %entry
+; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-TGSPLIT-NEXT: buffer_inv sc1
+; GFX942-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: cluster_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: cluster_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: cluster_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: cluster_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: cluster_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
+entry:
+ fence syncscope("cluster") acquire
+ ret void
+}
+
+define amdgpu_kernel void @cluster_release_fence() {
+; GFX6-LABEL: cluster_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: cluster_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: cluster_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: cluster_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: cluster_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: cluster_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: cluster_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX942-NOTTGSPLIT-LABEL: cluster_release_fence:
+; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX942-TGSPLIT-LABEL: cluster_release_fence:
+; GFX942-TGSPLIT: ; %bb.0: ; %entry
+; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: cluster_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: cluster_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: cluster_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
+; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: cluster_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX12-CU-NEXT: s_wait_samplecnt 0x0
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: cluster_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
+entry:
+ fence syncscope("cluster") release
+ ret void
+}
+
+define amdgpu_kernel void @cluster_acq_rel_fence() {
+; GFX6-LABEL: cluster_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: cluster_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: cluster_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: cluster_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: cluster_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: cluster_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: cluster_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX942-NOTTGSPLIT-LABEL: cluster_acq_rel_fence:
+; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX942-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX942-TGSPLIT-LABEL: cluster_acq_rel_fence:
+; GFX942-TGSPLIT: ; %bb.0: ; %entry
+; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-TGSPLIT-NEXT: buffer_inv sc1
+; GFX942-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: cluster_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: cluster_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: cluster_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
+; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: cluster_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX12-CU-NEXT: s_wait_samplecnt 0x0
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: cluster_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
+entry:
+ fence syncscope("cluster") acq_rel
+ ret void
+}
+
+define amdgpu_kernel void @cluster_seq_cst_fence() {
+; GFX6-LABEL: cluster_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: cluster_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: cluster_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: cluster_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: cluster_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: cluster_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: cluster_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX942-NOTTGSPLIT-LABEL: cluster_seq_cst_fence:
+; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX942-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX942-TGSPLIT-LABEL: cluster_seq_cst_fence:
+; GFX942-TGSPLIT: ; %bb.0: ; %entry
+; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-TGSPLIT-NEXT: buffer_inv sc1
+; GFX942-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: cluster_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: cluster_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: cluster_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
+; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: cluster_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX12-CU-NEXT: s_wait_samplecnt 0x0
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: cluster_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
+entry:
+ fence syncscope("cluster") seq_cst
+ ret void
+}
+
+define amdgpu_kernel void @cluster_one_as_acquire_fence() {
+; GFX6-LABEL: cluster_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: cluster_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: cluster_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: cluster_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: cluster_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: cluster_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: cluster_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX942-NOTTGSPLIT-LABEL: cluster_one_as_acquire_fence:
+; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX942-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX942-TGSPLIT-LABEL: cluster_one_as_acquire_fence:
+; GFX942-TGSPLIT: ; %bb.0: ; %entry
+; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX942-TGSPLIT-NEXT: buffer_inv sc1
+; GFX942-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: cluster_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: cluster_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: cluster_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: cluster_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_loadcnt 0x0
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: cluster_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
+entry:
+ fence syncscope("cluster-one-as") acquire
+ ret void
+}
+
+define amdgpu_kernel void @cluster_one_as_release_fence() {
+; GFX6-LABEL: cluster_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: cluster_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: cluster_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: cluster_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: cluster_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: cluster_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: cluster_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX942-NOTTGSPLIT-LABEL: cluster_one_as_release_fence:
+; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX942-TGSPLIT-LABEL: cluster_one_as_release_fence:
+; GFX942-TGSPLIT: ; %bb.0: ; %entry
+; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX942-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: cluster_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: cluster_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: cluster_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
+; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
+; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: cluster_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX12-CU-NEXT: s_wait_samplecnt 0x0
+; GFX12-CU-NEXT: s_wait_loadcnt 0x0
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: cluster_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
+entry:
+ fence syncscope("cluster-one-as") release
+ ret void
+}
+
+define amdgpu_kernel void @cluster_one_as_acq_rel_fence() {
+; GFX6-LABEL: cluster_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: cluster_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: cluster_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: cluster_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: cluster_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: cluster_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: cluster_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX942-NOTTGSPLIT-LABEL: cluster_one_as_acq_rel_fence:
+; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX942-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX942-TGSPLIT-LABEL: cluster_one_as_acq_rel_fence:
+; GFX942-TGSPLIT: ; %bb.0: ; %entry
+; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX942-TGSPLIT-NEXT: buffer_inv sc1
+; GFX942-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: cluster_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: cluster_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: cluster_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
+; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
+; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: cluster_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX12-CU-NEXT: s_wait_samplecnt 0x0
+; GFX12-CU-NEXT: s_wait_loadcnt 0x0
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: cluster_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
+entry:
+ fence syncscope("cluster-one-as") acq_rel
+ ret void
+}
+
+define amdgpu_kernel void @cluster_one_as_seq_cst_fence() {
+; GFX6-LABEL: cluster_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: cluster_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: cluster_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: cluster_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: cluster_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: cluster_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: cluster_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX942-NOTTGSPLIT-LABEL: cluster_one_as_seq_cst_fence:
+; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX942-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX942-TGSPLIT-LABEL: cluster_one_as_seq_cst_fence:
+; GFX942-TGSPLIT: ; %bb.0: ; %entry
+; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
+; GFX942-TGSPLIT-NEXT: buffer_inv sc1
+; GFX942-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: cluster_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: cluster_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: cluster_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
+; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
+; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
+; GFX12-WGP-NEXT: s_wait_storecnt 0x0
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: cluster_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX12-CU-NEXT: s_wait_samplecnt 0x0
+; GFX12-CU-NEXT: s_wait_loadcnt 0x0
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: cluster_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
+entry:
+ fence syncscope("cluster-one-as") seq_cst
+ ret void
+}
+
define amdgpu_kernel void @agent_acquire_fence() {
; GFX6-LABEL: agent_acquire_fence:
; GFX6: ; %bb.0: ; %entry
@@ -1799,8 +2606,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -1905,8 +2711,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2012,8 +2817,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2202,8 +3006,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2308,8 +3111,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2415,8 +3217,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2612,8 +3413,6 @@ define amdgpu_kernel void @system_release_fence() {
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2725,8 +3524,6 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2839,8 +3636,6 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -3036,8 +3831,6 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -3149,8 +3942,6 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -3263,8 +4054,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS