diff options
| author | vangthao95 <vang.thao@amd.com> | 2025-10-30 08:19:12 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-30 08:19:12 -0700 |
| commit | ba5cde79aa05eeaa87d45cf472f3583fa9f93bff (patch) | |
| tree | a2d5e3f829d53e4b1332e02e9c5f7f107d547f3f /llvm/test/CodeGen/AMDGPU/GlobalISel | |
| parent | 0030fac839566eb83bdb8a7ed61800ac021b2465 (diff) | |
[AMDGPU][GlobalISel] Fix issue with copy_scc_vcc on gfx7 (#165355)
When selecting for G_AMDGPU_COPY_SCC_VCC, we use S_CMP_LG_U64 or
S_CMP_LG_U32 for wave64 and wave32 respectively. However, on gfx7 we do
not have the S_CMP_LG_U64 instruction. Work around this issue by using
S_OR_B64 instead.
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll | 66 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir | 37 |
2 files changed, 103 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll new file mode 100644 index 000000000000..1a7ccf083568 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s + +define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) { +; GFX7-LABEL: fcmp_uniform_select: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9 +; GFX7-NEXT: s_load_dword s3, s[4:5], 0xb +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_f32_e64 s[4:5], s6, 0 +; GFX7-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7-NEXT: s_cselect_b32 s4, 1, 0 +; GFX7-NEXT: s_and_b32 s4, s4, 1 +; GFX7-NEXT: s_cmp_lg_u32 s4, 0 +; GFX7-NEXT: s_cselect_b32 s3, s7, s3 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: fcmp_uniform_select: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c +; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0 +; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0 +; GFX8-NEXT: s_cselect_b32 s0, 1, 0 +; GFX8-NEXT: s_and_b32 s0, s0, 1 +; GFX8-NEXT: s_cmp_lg_u32 s0, 0 +; GFX8-NEXT: s_cselect_b32 s0, s1, s6 +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX11-LABEL: fcmp_uniform_select: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c +; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 +; GFX11-NEXT: s_cmp_lg_u32 s0, 0 +; GFX11-NEXT: s_cselect_b32 s0, 1, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s0, s0, 1 +; GFX11-NEXT: s_cmp_lg_u32 s0, 0 +; GFX11-NEXT: s_cselect_b32 s0, s1, s6 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_endpgm + %cmp = fcmp oeq float %a, 0.0 + %sel = select i1 %cmp, i32 %b, i32 %c + store i32 %sel, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir new file mode 100644 index 000000000000..67cc0169af61 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s + +--- +name: test_copy_scc_vcc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + ; GFX7-LABEL: name: test_copy_scc_vcc + ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX7-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GF8-LABEL: name: test_copy_scc_vcc + ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc + ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GF8-NEXT: $sgpr0 = COPY [[COPY]] + ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GFX11-LABEL: name: test_copy_scc_vcc + ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0 + %0:vcc(s1) = G_IMPLICIT_DEF + %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0 + $sgpr0 = COPY %1 + S_ENDPGM 0, implicit $sgpr0 +... |
