diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll | 845 |
1 files changed, 817 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll index b3f6533d4388..d288bfc6a09d 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll @@ -1066,7 +1066,8 @@ define amdgpu_kernel void @workgroup_acquire_fence() { ; ; GFX1250-LABEL: workgroup_acquire_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_endpgm entry: fence syncscope("workgroup") acquire @@ -1146,7 +1147,8 @@ define amdgpu_kernel void @workgroup_release_fence() { ; ; GFX1250-LABEL: workgroup_release_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_endpgm entry: fence syncscope("workgroup") release @@ -1231,7 +1233,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; ; GFX1250-LABEL: workgroup_acq_rel_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_endpgm entry: fence syncscope("workgroup") acq_rel @@ -1316,7 +1319,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; ; GFX1250-LABEL: workgroup_seq_cst_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_endpgm entry: fence syncscope("workgroup") seq_cst @@ -1391,6 +1395,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() { ; ; GFX1250-LABEL: workgroup_one_as_acquire_fence: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_endpgm entry: fence syncscope("workgroup-one-as") acquire @@ -1462,6 +1468,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; ; GFX1250-LABEL: workgroup_one_as_release_fence: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_endpgm entry: fence syncscope("workgroup-one-as") release @@ -1538,6 +1546,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; ; GFX1250-LABEL: workgroup_one_as_acq_rel_fence: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_endpgm entry: fence syncscope("workgroup-one-as") acq_rel @@ -1614,12 +1624,809 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; ; GFX1250-LABEL: workgroup_one_as_seq_cst_fence: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_endpgm entry: fence syncscope("workgroup-one-as") seq_cst ret void } + +define amdgpu_kernel void @cluster_acquire_fence() { +; GFX6-LABEL: cluster_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: cluster_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: cluster_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: cluster_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: cluster_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: cluster_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: cluster_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX942-NOTTGSPLIT-LABEL: cluster_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: cluster_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: cluster_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: cluster_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: cluster_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_wait_storecnt 0x0 +; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: cluster_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: cluster_acquire_fence: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm +entry: + fence syncscope("cluster") acquire + ret void +} + +define amdgpu_kernel void @cluster_release_fence() { +; GFX6-LABEL: cluster_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: cluster_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: cluster_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: cluster_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: cluster_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: cluster_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: cluster_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX942-NOTTGSPLIT-LABEL: cluster_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: cluster_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: cluster_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: cluster_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: cluster_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 +; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 +; GFX12-WGP-NEXT: s_wait_storecnt 0x0 +; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: cluster_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: cluster_release_fence: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_endpgm +entry: + fence syncscope("cluster") release + ret void +} + +define amdgpu_kernel void @cluster_acq_rel_fence() { +; GFX6-LABEL: cluster_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: cluster_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: cluster_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: cluster_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: cluster_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: cluster_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: cluster_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX942-NOTTGSPLIT-LABEL: cluster_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: cluster_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: cluster_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: cluster_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: cluster_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 +; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 +; GFX12-WGP-NEXT: s_wait_storecnt 0x0 +; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: cluster_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: cluster_acq_rel_fence: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm +entry: + fence syncscope("cluster") acq_rel + ret void +} + +define amdgpu_kernel void @cluster_seq_cst_fence() { +; GFX6-LABEL: cluster_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: cluster_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: cluster_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: cluster_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: cluster_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: cluster_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: cluster_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX942-NOTTGSPLIT-LABEL: cluster_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: cluster_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: cluster_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: cluster_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: cluster_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 +; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 +; GFX12-WGP-NEXT: s_wait_storecnt 0x0 +; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: cluster_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: cluster_seq_cst_fence: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm +entry: + fence syncscope("cluster") seq_cst + ret void +} + +define amdgpu_kernel void @cluster_one_as_acquire_fence() { +; GFX6-LABEL: cluster_one_as_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: cluster_one_as_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: cluster_one_as_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: cluster_one_as_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: cluster_one_as_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: cluster_one_as_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: cluster_one_as_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX942-NOTTGSPLIT-LABEL: cluster_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: cluster_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: cluster_one_as_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: cluster_one_as_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: cluster_one_as_acquire_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 +; GFX12-WGP-NEXT: s_wait_storecnt 0x0 +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: cluster_one_as_acquire_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: cluster_one_as_acquire_fence: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm +entry: + fence syncscope("cluster-one-as") acquire + ret void +} + +define amdgpu_kernel void @cluster_one_as_release_fence() { +; GFX6-LABEL: cluster_one_as_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: cluster_one_as_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: cluster_one_as_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: cluster_one_as_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: cluster_one_as_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: cluster_one_as_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: cluster_one_as_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX942-NOTTGSPLIT-LABEL: cluster_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: cluster_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: cluster_one_as_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: cluster_one_as_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: cluster_one_as_release_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 +; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 +; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 +; GFX12-WGP-NEXT: s_wait_storecnt 0x0 +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: cluster_one_as_release_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: cluster_one_as_release_fence: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: s_endpgm +entry: + fence syncscope("cluster-one-as") release + ret void +} + +define amdgpu_kernel void @cluster_one_as_acq_rel_fence() { +; GFX6-LABEL: cluster_one_as_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: cluster_one_as_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: cluster_one_as_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: cluster_one_as_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: cluster_one_as_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: cluster_one_as_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: cluster_one_as_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX942-NOTTGSPLIT-LABEL: cluster_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: cluster_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: cluster_one_as_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: cluster_one_as_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: cluster_one_as_acq_rel_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 +; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 +; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 +; GFX12-WGP-NEXT: s_wait_storecnt 0x0 +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: cluster_one_as_acq_rel_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: cluster_one_as_acq_rel_fence: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm +entry: + fence syncscope("cluster-one-as") acq_rel + ret void +} + +define amdgpu_kernel void @cluster_one_as_seq_cst_fence() { +; GFX6-LABEL: cluster_one_as_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: cluster_one_as_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: cluster_one_as_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: cluster_one_as_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: cluster_one_as_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: cluster_one_as_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: cluster_one_as_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX942-NOTTGSPLIT-LABEL: cluster_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: cluster_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: cluster_one_as_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: cluster_one_as_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +; +; GFX12-WGP-LABEL: cluster_one_as_seq_cst_fence: +; GFX12-WGP: ; %bb.0: ; %entry +; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 +; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 +; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 +; GFX12-WGP-NEXT: s_wait_storecnt 0x0 +; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV +; GFX12-WGP-NEXT: s_endpgm +; +; GFX12-CU-LABEL: cluster_one_as_seq_cst_fence: +; GFX12-CU: ; %bb.0: ; %entry +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV +; GFX12-CU-NEXT: s_endpgm +; +; GFX1250-LABEL: cluster_one_as_seq_cst_fence: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm +entry: + fence syncscope("cluster-one-as") seq_cst + ret void +} + define amdgpu_kernel void @agent_acquire_fence() { ; GFX6-LABEL: agent_acquire_fence: ; GFX6: ; %bb.0: ; %entry @@ -1799,8 +2606,7 @@ define amdgpu_kernel void @agent_release_fence() { ; ; GFX1250-LABEL: agent_release_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_endpgm @@ -1905,8 +2711,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() { ; ; GFX1250-LABEL: agent_acq_rel_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -2012,8 +2817,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() { ; ; GFX1250-LABEL: agent_seq_cst_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -2202,8 +3006,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() { ; ; GFX1250-LABEL: agent_one_as_release_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_endpgm @@ -2308,8 +3111,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() { ; ; GFX1250-LABEL: agent_one_as_acq_rel_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -2415,8 +3217,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() { ; ; GFX1250-LABEL: agent_one_as_seq_cst_fence: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_DEV ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_DEV @@ -2612,8 +3413,6 @@ define amdgpu_kernel void @system_release_fence() { ; GFX1250-LABEL: system_release_fence: ; GFX1250: ; %bb.0: ; %entry ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_endpgm @@ -2725,8 +3524,6 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; GFX1250-LABEL: system_acq_rel_fence: ; GFX1250: ; %bb.0: ; %entry ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -2839,8 +3636,6 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; GFX1250-LABEL: system_seq_cst_fence: ; GFX1250: ; %bb.0: ; %entry ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -3036,8 +3831,6 @@ define amdgpu_kernel void @system_one_as_release_fence() { ; GFX1250-LABEL: system_one_as_release_fence: ; GFX1250: ; %bb.0: ; %entry ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: s_endpgm @@ -3149,8 +3942,6 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() { ; GFX1250-LABEL: system_one_as_acq_rel_fence: ; GFX1250: ; %bb.0: ; %entry ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS @@ -3263,8 +4054,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() { ; GFX1250-LABEL: system_one_as_seq_cst_fence: ; GFX1250: ; %bb.0: ; %entry ; GFX1250-NEXT: global_wb scope:SCOPE_SYS -; GFX1250-NEXT: s_wait_bvhcnt 0x0 -; GFX1250-NEXT: s_wait_samplecnt 0x0 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: global_inv scope:SCOPE_SYS |
