diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll | 429 |
1 files changed, 357 insertions, 72 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll index 6bb104311a4d..ab8d8c192187 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-UNALIGNED %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX1250,GFX1250-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX1250,GFX1250-NOUNALIGNED %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s @@ -64,6 +66,52 @@ define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) { ; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7 ; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] ; +; GFX1250-UNALIGNED-LABEL: v_load_constant_v3i32_align1: +; GFX1250-UNALIGNED: ; %bb.0: +; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-UNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX1250-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX1250-UNALIGNED-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: +; GFX1250-NOUNALIGNED: ; %bb.0: +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_clause 0xb +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v2, v[0:1], off +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v3, v[0:1], off offset:1 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v4, v[0:1], off offset:2 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v5, v[0:1], off offset:3 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v6, v[0:1], off offset:4 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v7, v[0:1], off offset:5 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v8, v[0:1], off offset:6 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v9, v[0:1], off offset:7 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v10, v[0:1], off offset:8 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v11, v[0:1], off offset:9 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v12, v[0:1], off offset:11 +; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v0, v[0:1], off offset:10 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0xa +; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x8 +; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v3, 16, v4 :: v_dual_lshlrev_b32 v2, 24, v5 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x6 +; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v4, v7, 8, v6 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x4 +; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v6, 16, v8 :: v_dual_lshlrev_b32 v5, 24, v9 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x2 +; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v11, 8, v10 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x1 +; GFX1250-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v12 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v0 +; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v0, v2, v3, v1 +; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v4 +; GFX1250-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7 +; GFX1250-NOUNALIGNED-NEXT: s_set_pc_i64 s[30:31] +; ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1: ; GFX9-UNALIGNED: ; %bb.0: ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -256,6 +304,34 @@ define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) { ; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6 ; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] ; +; GFX1250-UNALIGNED-LABEL: v_load_constant_v3i32_align2: +; GFX1250-UNALIGNED: ; %bb.0: +; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-UNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX1250-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX1250-UNALIGNED-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: +; GFX1250-NOUNALIGNED: ; %bb.0: +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_clause 0x5 +; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v2, v[0:1], off +; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v3, v[0:1], off offset:2 +; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v4, v[0:1], off offset:4 +; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v5, v[0:1], off offset:6 +; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v6, v[0:1], off offset:8 +; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v7, v[0:1], off offset:10 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x4 +; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x2 +; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v5, 16, v4 +; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6 +; GFX1250-NOUNALIGNED-NEXT: s_set_pc_i64 s[30:31] +; ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2: ; GFX9-UNALIGNED: ; %bb.0: ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -346,16 +422,35 @@ define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) { } define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) { -; GFX12-LABEL: v_load_constant_v3i32_align4: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off -; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align4: +; GFX12-UNALIGNED: ; %bb.0: +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align4: +; GFX12-NOUNALIGNED: ; %bb.0: +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_load_constant_v3i32_align4: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; ; GFX9-LABEL: v_load_constant_v3i32_align4: ; GFX9: ; %bb.0: @@ -392,16 +487,35 @@ define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) { } define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) { -; GFX12-LABEL: v_load_constant_i96_align8: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off -; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-UNALIGNED-LABEL: v_load_constant_i96_align8: +; GFX12-UNALIGNED: ; %bb.0: +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-NOUNALIGNED-LABEL: v_load_constant_i96_align8: +; GFX12-NOUNALIGNED: ; %bb.0: +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_load_constant_i96_align8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; ; GFX9-LABEL: v_load_constant_i96_align8: ; GFX9: ; %bb.0: @@ -438,16 +552,35 @@ define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) { } define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) { -; GFX12-LABEL: v_load_constant_v3i32_align8: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off -; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align8: +; GFX12-UNALIGNED: ; %bb.0: +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align8: +; GFX12-NOUNALIGNED: ; %bb.0: +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_load_constant_v3i32_align8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; ; GFX9-LABEL: v_load_constant_v3i32_align8: ; GFX9: ; %bb.0: @@ -484,16 +617,35 @@ define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) { } define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) { -; GFX12-LABEL: v_load_constant_v6i16_align8: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off -; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-UNALIGNED-LABEL: v_load_constant_v6i16_align8: +; GFX12-UNALIGNED: ; %bb.0: +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-NOUNALIGNED-LABEL: v_load_constant_v6i16_align8: +; GFX12-NOUNALIGNED: ; %bb.0: +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_load_constant_v6i16_align8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; ; GFX9-LABEL: v_load_constant_v6i16_align8: ; GFX9: ; %bb.0: @@ -539,28 +691,67 @@ define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) { } define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) { -; GFX12-LABEL: v_load_constant_v12i8_align8: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off -; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v13, 8, v0 -; GFX12-NEXT: v_lshrrev_b32_e32 v12, 16, v0 -; GFX12-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX12-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX12-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX12-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GFX12-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX12-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13 -; GFX12-NEXT: v_mov_b32_e32 v8, v2 -; GFX12-NEXT: v_mov_b32_e32 v2, v12 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-UNALIGNED-LABEL: v_load_constant_v12i8_align8: +; GFX12-UNALIGNED: ; %bb.0: +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v0 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v12, 16, v0 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v2 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v2 +; GFX12-UNALIGNED-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13 +; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v8, v2 +; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v2, v12 +; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-NOUNALIGNED-LABEL: v_load_constant_v12i8_align8: +; GFX12-NOUNALIGNED: ; %bb.0: +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v0 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v12, 16, v0 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v2 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v2 +; GFX12-NOUNALIGNED-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13 +; GFX12-NOUNALIGNED-NEXT: v_mov_b32_e32 v8, v2 +; GFX12-NOUNALIGNED-NEXT: v_mov_b32_e32 v2, v12 +; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_load_constant_v12i8_align8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_dual_lshrrev_b32 v13, 8, v0 :: v_dual_lshrrev_b32 v12, 16, v0 +; GFX1250-NEXT: v_dual_lshrrev_b32 v3, 24, v0 :: v_dual_lshrrev_b32 v5, 8, v1 +; GFX1250-NEXT: v_dual_lshrrev_b32 v6, 16, v1 :: v_dual_lshrrev_b32 v7, 24, v1 +; GFX1250-NEXT: v_dual_lshrrev_b32 v9, 8, v2 :: v_dual_lshrrev_b32 v10, 16, v2 +; GFX1250-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_lshrrev_b32 v11, 24, v2 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v8, v2 :: v_dual_mov_b32 v1, v13 +; GFX1250-NEXT: v_mov_b32_e32 v2, v12 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; ; GFX9-LABEL: v_load_constant_v12i8_align8: ; GFX9: ; %bb.0: @@ -632,16 +823,35 @@ define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) { } define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) { -; GFX12-LABEL: v_load_constant_v3i32_align16: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX12-NEXT: s_wait_expcnt 0x0 -; GFX12-NEXT: s_wait_samplecnt 0x0 -; GFX12-NEXT: s_wait_bvhcnt 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off -; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: s_setpc_b64 s[30:31] +; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align16: +; GFX12-UNALIGNED: ; %bb.0: +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align16: +; GFX12-NOUNALIGNED: ; %bb.0: +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_load_constant_v3i32_align16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; ; GFX9-LABEL: v_load_constant_v3i32_align16: ; GFX9: ; %bb.0: @@ -720,6 +930,53 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg ; GFX12-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s5 ; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog ; +; GFX1250-UNALIGNED-LABEL: s_load_constant_v3i32_align1: +; GFX1250-UNALIGNED: ; %bb.0: +; GFX1250-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1] +; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX1250-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1250-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1250-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1250-UNALIGNED-NEXT: ; return to shader part epilog +; +; GFX1250-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1: +; GFX1250-NOUNALIGNED: ; %bb.0: +; GFX1250-NOUNALIGNED-NEXT: s_clause 0xa +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s2, s[0:1], 0x1 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s3, s[0:1], 0x3 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s4, s[0:1], 0x2 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s5, s[0:1], 0x5 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s6, s[0:1], 0x7 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s7, s[0:1], 0x6 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s8, s[0:1], 0x9 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s9, s[0:1], 0xb +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s10, s[0:1], 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s11, s[0:1], 0x4 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s12, s[0:1], 0xa +; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s1, s[0:1], 0x8 +; GFX1250-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s0, s2, 8 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2, s3, 24 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s3, s4, 16 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s4, s5, 8 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s3 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s5, s6, 24 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s6, s7, 16 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s7, s8, 8 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s0, s10 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s8, s9, 24 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s2, s0 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2, s12, 16 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s3, s4, s11 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s4, s5, s6 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s5, s7, s1 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s8, s2 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s1, s4, s3 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s5 +; GFX1250-NOUNALIGNED-NEXT: ; return to shader part epilog +; ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1: ; GFX9-UNALIGNED: ; %bb.0: ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 @@ -916,6 +1173,34 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg ; GFX12-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s7 ; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog ; +; GFX1250-UNALIGNED-LABEL: s_load_constant_v3i32_align2: +; GFX1250-UNALIGNED: ; %bb.0: +; GFX1250-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1] +; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt 0x0 +; GFX1250-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1250-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1250-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1250-UNALIGNED-NEXT: ; return to shader part epilog +; +; GFX1250-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: +; GFX1250-NOUNALIGNED: ; %bb.0: +; GFX1250-NOUNALIGNED-NEXT: s_clause 0x5 +; GFX1250-NOUNALIGNED-NEXT: s_load_u16 s2, s[0:1], 0x2 +; GFX1250-NOUNALIGNED-NEXT: s_load_u16 s3, s[0:1], 0x6 +; GFX1250-NOUNALIGNED-NEXT: s_load_u16 s4, s[0:1], 0xa +; GFX1250-NOUNALIGNED-NEXT: s_load_u16 s5, s[0:1], 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_load_u16 s6, s[0:1], 0x4 +; GFX1250-NOUNALIGNED-NEXT: s_load_u16 s7, s[0:1], 0x8 +; GFX1250-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s0, s2, 16 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s1, s3, 16 +; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2, s4, 16 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s0, s5 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s1, s1, s6 +; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s7 +; GFX1250-NOUNALIGNED-NEXT: ; return to shader part epilog +; ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2: ; GFX9-UNALIGNED: ; %bb.0: ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 |
