diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll new file mode 100644 index 000000000000..f57103007787 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll @@ -0,0 +1,301 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +declare void @llvm.amdgcn.cluster.load.async.to.lds.b8(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol, i32 %mask) +declare void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol, i32 %mask) +declare void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol, i32 %mask) +declare void @llvm.amdgcn.cluster.load.async.to.lds.b128(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol, i32 %mask) + +define amdgpu_ps void @cluster_load_async_to_lds_b8_vaddr(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %mask) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b8_vaddr: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1] +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3 +; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0 +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b8 v2, v[0:1], off offset:16 th:TH_LOAD_NT +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b8_vaddr: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s0, v3 +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: s_mov_b32 m0, s0 +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b8 v2, v[0:1], off offset:16 th:TH_LOAD_NT +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b8(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 1, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b8_vaddr_imm_mask(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b8_vaddr_imm_mask: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1] +; GFX1250-SDAG-NEXT: s_mov_b32 m0, 15 +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b8 v2, v[0:1], off offset:16 +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b8_vaddr_imm_mask: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: s_mov_b32 m0, 15 +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b8 v2, v[0:1], off offset:16 +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b8(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0, i32 15) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b8_saddr(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask) { +; GFX1250-LABEL: cluster_load_async_to_lds_b8_saddr: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v1, 32 +; GFX1250-NEXT: s_mov_b32 m0, s2 +; GFX1250-NEXT: cluster_load_async_to_lds_b8 v0, v1, s[0:1] offset:16 +; GFX1250-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b8(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b32_vaddr(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %mask) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b32_vaddr: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1] +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3 +; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0 +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b32 v2, v[0:1], off offset:16 th:TH_LOAD_HT scope:SCOPE_SE +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b32_vaddr: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s0, v3 +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: s_mov_b32 m0, s0 +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b32 v2, v[0:1], off offset:16 th:TH_LOAD_HT scope:SCOPE_SE +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 10, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b32_vaddr_imm_mask(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b32_vaddr_imm_mask: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1] +; GFX1250-SDAG-NEXT: s_mov_b32 m0, 15 +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b32 v2, v[0:1], off offset:16 +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b32_vaddr_imm_mask: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: s_mov_b32 m0, 15 +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b32 v2, v[0:1], off offset:16 +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0, i32 15) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b32_saddr( ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask) { +; GFX1250-LABEL: cluster_load_async_to_lds_b32_saddr: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v1, 32 +; GFX1250-NEXT: s_mov_b32 m0, s2 +; GFX1250-NEXT: cluster_load_async_to_lds_b32 v0, v1, s[0:1] offset:16 +; GFX1250-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b64_vaddr(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %mask) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b64_vaddr: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1] +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3 +; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0 +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b64 v2, v[0:1], off offset:16 th:TH_LOAD_NT_HT scope:SCOPE_DEV +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b64_vaddr: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s0, v3 +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: s_mov_b32 m0, s0 +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b64 v2, v[0:1], off offset:16 th:TH_LOAD_NT_HT scope:SCOPE_DEV +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 22, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b64_vaddr_imm_mask( ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b64_vaddr_imm_mask: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1] +; GFX1250-SDAG-NEXT: s_movk_i32 m0, 0x7f +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b64 v2, v[0:1], off offset:16 +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b64_vaddr_imm_mask: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: s_movk_i32 m0, 0x7f +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b64 v2, v[0:1], off offset:16 +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0, i32 127) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b64_saddr(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask) { +; GFX1250-LABEL: cluster_load_async_to_lds_b64_saddr: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v1, 32 +; GFX1250-NEXT: s_mov_b32 m0, s2 +; GFX1250-NEXT: cluster_load_async_to_lds_b64 v0, v1, s[0:1] offset:16 +; GFX1250-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b128_vaddr(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %mask) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b128_vaddr: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1] +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v3 +; GFX1250-SDAG-NEXT: s_mov_b32 m0, s0 +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b128 v2, v[0:1], off offset:16 th:TH_LOAD_BYPASS scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b128_vaddr: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s0, v3 +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: s_mov_b32 m0, s0 +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b128 v2, v[0:1], off offset:16 th:TH_LOAD_BYPASS scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b128(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 27, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b128_vaddr_imm_mask(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b128_vaddr_imm_mask: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 32, v[0:1] +; GFX1250-SDAG-NEXT: s_movk_i32 m0, 0x7f +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b128 v2, v[0:1], off offset:16 +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b128_vaddr_imm_mask: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: s_movk_i32 m0, 0x7f +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b128 v2, v[0:1], off offset:16 +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b128(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0, i32 127) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b128_saddr(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask) { +; GFX1250-LABEL: cluster_load_async_to_lds_b128_saddr: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v1, 32 +; GFX1250-NEXT: s_mov_b32 m0, s2 +; GFX1250-NEXT: cluster_load_async_to_lds_b128 v0, v1, s[0:1] offset:16 +; GFX1250-NEXT: s_endpgm +entry: + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 + call void @llvm.amdgcn.cluster.load.async.to.lds.b128(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b32_saddr_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask, i32 %idx) { +; GFX1250-LABEL: cluster_load_async_to_lds_b32_saddr_scale_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_mov_b32 m0, s2 +; GFX1250-NEXT: cluster_load_async_to_lds_b32 v0, v1, s[0:1] offset:16 scale_offset th:TH_LOAD_NT +; GFX1250-NEXT: s_endpgm +entry: + %idxprom = sext i32 %idx to i64 + %gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom + call void @llvm.amdgcn.cluster.load.async.to.lds.b32(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 1, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b64_saddr_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask, i32 %idx) { +; GFX1250-LABEL: cluster_load_async_to_lds_b64_saddr_scale_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_mov_b32 m0, s2 +; GFX1250-NEXT: cluster_load_async_to_lds_b64 v0, v1, s[0:1] offset:16 scale_offset th:TH_LOAD_NT +; GFX1250-NEXT: s_endpgm +entry: + %idxprom = sext i32 %idx to i64 + %gep = getelementptr i64, ptr addrspace(1) %gaddr, i64 %idxprom + call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 1, i32 %mask) + ret void +} + +define amdgpu_ps void @cluster_load_async_to_lds_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask, i32 %idx) { +; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b64_saddr_no_scale_offset: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1 +; GFX1250-SDAG-NEXT: s_mov_b32 m0, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1] +; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b64_saddr_no_scale_offset: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, v1 +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] +; GFX1250-GISEL-NEXT: s_mov_b32 m0, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo +; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT +; GFX1250-GISEL-NEXT: s_endpgm +entry: + %idxprom = sext i32 %idx to i64 + %gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom + call void @llvm.amdgcn.cluster.load.async.to.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 1, i32 %mask) + ret void +} |
