diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll | 479 |
1 files changed, 479 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll new file mode 100644 index 000000000000..f97bced6364d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll @@ -0,0 +1,479 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !0) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !1) + ret void +} + +!0 = !{ !"singlethread" } +!1 = !{ !"singlethread-one-as" } + +;; GFX1250: {{.*}} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-GISEL: {{.*}} +; GFX1250-SDAG: {{.*}} |
