diff options
| author | Steven Wu <stevenwu@apple.com> | 2025-07-31 10:49:36 -0700 |
|---|---|---|
| committer | Steven Wu <stevenwu@apple.com> | 2025-07-31 10:49:36 -0700 |
| commit | ab17987c5d1235e2c0f5cf25c6c6005f6c8af6c8 (patch) | |
| tree | 0ba393cfba0617bd7ee1b7d1482598e337f17ba8 | |
| parent | 9032d0f21c888f90353b85b0136aabfcc7edf2d0 (diff) | |
| parent | 7f93487862d98bf1c168babba87daf6224d8a46f (diff) | |
[𝘀𝗽𝗿] changes introduced through rebaseusers/cachemeifyoucan/spr/main.clangscandeps-clear-compilation-directory-if-needed
Created using spr 1.3.6
[skip ci]
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll | 880 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s | 45 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s | 45 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s | 56 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s | 56 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s | 16 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s | 16 | ||||
| -rw-r--r-- | llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt | 45 | ||||
| -rw-r--r-- | llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt | 42 | ||||
| -rw-r--r-- | llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt | 12 | ||||
| -rw-r--r-- | llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt | 2 |
13 files changed, 1216 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 2d3caec72dea..96fe503c369a 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -2066,6 +2066,7 @@ defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>; defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>; defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>; defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>; +defm V_CVT_PK_F16_F32 : VOP3Only_Realtriple_gfx1250<0x36f>; defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x372, "v_cvt_pk_fp8_f16">; defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x373, "v_cvt_pk_bf8_f16">; defm V_CVT_SR_FP8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x374>; diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index 1d33c2668652..8894b50db42c 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -11,6 +11,10 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s +; TODO: FIXME-TRUE16 llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-FAKE16 %s +; TODO: FIXME-TRUE16 llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-FAKE16 %s define amdgpu_kernel void @fptrunc_f32_to_f16( ; SI-SDAG-LABEL: fptrunc_f32_to_f16: @@ -192,6 +196,39 @@ define amdgpu_kernel void @fptrunc_f32_to_f16( ; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_cvt_f16_f32 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -381,6 +418,39 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r, ; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_afn: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_afn: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_cvt_f16_f32 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %a) { entry: %a.val = load float, ptr addrspace(1) %a @@ -1089,6 +1159,130 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s3, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v1, s4, 0, 13 +; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v1 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s3, 0xfc10 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s5, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX1250-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s4, s8, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s3, 0x40f +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s4, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX1250-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s3, 8 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_addk_co_i32 s4, 0xfc10 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s5, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_sub_co_i32 s6, 1, s4 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX1250-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s7, s4, 12 +; GFX1250-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s9, s8, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s2, s7 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s9, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s8 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s6, s9, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s6, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s6, s2, 7 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s2, s2, 2 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s7, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s6, s7, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_add_co_i32 s2, s2, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s4, 0x40f +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s5, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 16 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -1294,6 +1488,87 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn( ; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s3, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v1, s4, 0, 13 +; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v1 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s3, 0xfc10 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s5, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX1250-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s4, s8, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s3, 0x40f +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s4, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -1511,6 +1786,38 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16( ; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f32_to_v2f16: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v1 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f32_to_v2f16: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -2756,6 +3063,225 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, s3, v2 +; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v3, s4, 0, 13 +; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v2 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s3, 0xfc10 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s5, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX1250-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, s8, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s3, 0x40f +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s9, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s4, 0x1ff +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s10, s4, 8 +; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s5, s4, 0xb0014 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s10, s10, 0xffe +; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s9, 0x3f1, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3 +; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s11, v1 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s9, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s9, s10, s9 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, 0x1000 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s12, s10, s11 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s11, s12, s11 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s11, s10 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s5, 0xfc10 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s3, s12, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s10, s5, 12 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, s10 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, s10 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s10, s3, 7 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s10, 5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s11, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s10, 3 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s10, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s10, s11 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s3, s3, s10 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 31 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s9, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s5, 0x40f +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s4, s4, 16 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s3, s4, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX1250-GISEL-FAKE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 8 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4 +; GFX1250-GISEL-FAKE16-NEXT: s_addk_co_i32 s2, 0xfc10 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0xffe +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s4 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_sub_co_i32 s8, 1, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s10, s3, 0x1000 +; GFX1250-GISEL-FAKE16-NEXT: s_max_i32 s8, s8, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s9, s2, 12 +; GFX1250-GISEL-FAKE16-NEXT: s_min_i32 s8, s8, 13 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s4, s4, 9 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s11, s10, s8 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s9 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s11, s8 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s8, s10 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s8, s11, s8 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s2, 1 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s8, s3, 7 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s8, s9, s8 +; GFX1250-GISEL-FAKE16-NEXT: s_add_co_i32 s3, s3, s8 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s2, 30 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s2, 0x40f +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s4, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 16 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff +; GFX1250-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s7, 8 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_addk_co_i32 s4, 0xfc10 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s3, s5, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_sub_co_i32 s6, 1, s4 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s9, s3, 0x1000 +; GFX1250-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s4, 12 +; GFX1250-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s10, s9, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s8 +; GFX1250-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s10, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s9 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s6, s10, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s6, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 7 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_add_co_i32 s3, s3, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s4, 0x40f +; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s5, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s4, s7, 16 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s3, s4, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -3001,6 +3527,141 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn( ; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, s3, v2 +; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v3, s4, 0, 13 +; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v2 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s3, 0xfc10 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s5, s5, s8 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX1250-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, s8, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s3, 0x40f +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s9, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s4, 0x1ff +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s10, s4, 8 +; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s5, s4, 0xb0014 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s10, s10, 0xffe +; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s9, 0x3f1, s5 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3 +; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s11, v1 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s9, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s9, s10, s9 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, 0x1000 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s12, s10, s11 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s11, s12, s11 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s11, s10 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s5, 0xfc10 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s3, s12, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s10, s5, 12 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, s10 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, s10 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s10, s3, 7 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s10, 5 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s11, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s10, 3 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s10, 1, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s10, s11 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s3, s3, s10 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 31 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s9, 0 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s5, 0x40f +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s4, s4, 16 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s3, s4, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -3190,6 +3851,42 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16( ; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fneg_fptrunc_f32_to_f16: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fneg_fptrunc_f32_to_f16: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_xor_b32 s2, s2, 0x80000000 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; GFX1250-GISEL-FAKE16-NEXT: s_cvt_f16_f32 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -3380,6 +4077,42 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16( ; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fabs_fptrunc_f32_to_f16: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fabs_fptrunc_f32_to_f16: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_bitset0_b32 s2, 31 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; GFX1250-GISEL-FAKE16-NEXT: s_cvt_f16_f32 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) { entry: @@ -3570,6 +4303,42 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16( ; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, 0x80000000, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_bitset1_b32 s2, 31 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; GFX1250-GISEL-FAKE16-NEXT: s_cvt_f16_f32 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) #0 { entry: @@ -3769,6 +4538,42 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32( ; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_cvt_f16_f32 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) #0 { entry: @@ -3968,6 +4773,45 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32( ; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_bitset0_b32 s2, 31 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; GFX1250-GISEL-FAKE16-NEXT: s_cvt_f16_f32 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) #0 { entry: @@ -4176,6 +5020,42 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32( ; GFX11-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32: +; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32: +; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-FAKE16-NEXT: s_cvt_f16_f32 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_sext_i32_i16 s2, s2 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX1250-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], null +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a) #0 { entry: diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index 81fc477b19ac..7d4f28f28b4c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -467,6 +467,51 @@ v_cvt_pk_fp8_f16 v1, 0x3118 v_cvt_pk_fp8_f16 v1, 0.15915494 // GFX1250: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] +v_cvt_pk_f16_f32 v5, v1, v2 +// GFX1250: v_cvt_pk_f16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6f,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_f16_f32 v5, v255, v255 +// GFX1250: v_cvt_pk_f16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x6f,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_f16_f32 v5, s1, s2 +// GFX1250: v_cvt_pk_f16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x6f,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_f16_f32 v5, s105, s105 +// GFX1250: v_cvt_pk_f16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x6f,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_f16_f32 v5, vcc_lo, ttmp15 +// GFX1250: v_cvt_pk_f16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x6f,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_f16_f32 v5, vcc_hi, 0xaf123456 +// GFX1250: v_cvt_pk_f16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x6f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_f16_f32 v5, ttmp15, src_scc +// GFX1250: v_cvt_pk_f16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x6f,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_f16_f32 v5, m0, 0.5 +// GFX1250: v_cvt_pk_f16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x6f,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_f16_f32 v5, exec_lo, -1 +// GFX1250: v_cvt_pk_f16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x6f,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_f16_f32 v5, exec_hi, null +// GFX1250: v_cvt_pk_f16_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x6f,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_f16_f32 v5, null, exec_lo +// GFX1250: v_cvt_pk_f16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x6f,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_f16_f32 v5, -1, exec_hi +// GFX1250: v_cvt_pk_f16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x6f,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_f16_f32 v5, 0.5, m0 mul:2 +// GFX1250: v_cvt_pk_f16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x6f,0xd7,0xf0,0xfa,0x00,0x08] + +v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4 +// GFX1250: v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x6f,0xd7,0xfd,0xd4,0x00,0x10] + +v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] + v_cvt_sr_bf8_f16 v1, v2, v3 // GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index 209951d455a6..f910d85e3b9b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -467,6 +467,51 @@ v_cvt_pk_fp8_f16 v1.l, 0x3118 v_cvt_pk_fp8_f16 v1.l, 0.15915494 // GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] +v_cvt_pk_f16_f32 v5, v1, v2 +// GFX1250: v_cvt_pk_f16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6f,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_f16_f32 v5, v255, v255 +// GFX1250: v_cvt_pk_f16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x6f,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_f16_f32 v5, s1, s2 +// GFX1250: v_cvt_pk_f16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x6f,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_f16_f32 v5, s105, s105 +// GFX1250: v_cvt_pk_f16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x6f,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_f16_f32 v5, vcc_lo, ttmp15 +// GFX1250: v_cvt_pk_f16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x6f,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_f16_f32 v5, vcc_hi, 0xaf123456 +// GFX1250: v_cvt_pk_f16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x6f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_f16_f32 v5, ttmp15, src_scc +// GFX1250: v_cvt_pk_f16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x6f,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_f16_f32 v5, m0, 0.5 +// GFX1250: v_cvt_pk_f16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x6f,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_f16_f32 v5, exec_lo, -1 +// GFX1250: v_cvt_pk_f16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x6f,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_f16_f32 v5, exec_hi, null +// GFX1250: v_cvt_pk_f16_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x6f,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_f16_f32 v5, null, exec_lo +// GFX1250: v_cvt_pk_f16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x6f,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_f16_f32 v5, -1, exec_hi +// GFX1250: v_cvt_pk_f16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x6f,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_f16_f32 v5, 0.5, m0 mul:2 +// GFX1250: v_cvt_pk_f16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x6f,0xd7,0xf0,0xfa,0x00,0x08] + +v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4 +// GFX1250: v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x6f,0xd7,0xfd,0xd4,0x00,0x10] + +v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] + v_cvt_sr_bf8_f16 v1, v2.l, v3 // GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s index c406890e3325..4ffc9057acff 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s @@ -346,6 +346,62 @@ v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask: // GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6f,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_sr_bf8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1 // GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s index 741d1a1c510b..40894691581b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s @@ -346,6 +346,62 @@ v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 // GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6f,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_sr_bf8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1 // GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s index 8a3e7ada47bc..990cdbf34ff5 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s @@ -242,6 +242,22 @@ v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x6f,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_sr_bf8_f16 v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 // GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s index f79b5c5b8b7b..7c9fd82f4608 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s @@ -242,6 +242,22 @@ v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x6f,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_sr_bf8_f16 v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 // GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt index 10ffc2caa18b..49782d4cba4f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt @@ -486,6 +486,51 @@ # GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] # GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] +0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] + +0x05,0x00,0x6f,0xd7,0xc1,0xfe,0x00,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x6f,0xd7,0xc1,0xfe,0x00,0x00] + +0x05,0x00,0x6f,0xd7,0xf0,0xfa,0x00,0x08 +# GFX1250: v_cvt_pk_f16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x6f,0xd7,0xf0,0xfa,0x00,0x08] + +0x05,0x00,0x6f,0xd7,0x7f,0xf8,0x00,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x6f,0xd7,0x7f,0xf8,0x00,0x00] + +0x05,0x00,0x6f,0xd7,0x7e,0x82,0x01,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x6f,0xd7,0x7e,0x82,0x01,0x00] + +0x05,0x00,0x6f,0xd7,0x7d,0xe0,0x01,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x6f,0xd7,0x7d,0xe0,0x01,0x00] + +0x05,0x00,0x6f,0xd7,0x7c,0xfc,0x00,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x6f,0xd7,0x7c,0xfc,0x00,0x00] + +0x05,0x00,0x6f,0xd7,0x01,0x04,0x00,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x6f,0xd7,0x01,0x04,0x00,0x00] + +0x05,0x00,0x6f,0xd7,0x69,0xd2,0x00,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x6f,0xd7,0x69,0xd2,0x00,0x00] + +0x05,0x00,0x6f,0xd7,0xfd,0xd4,0x00,0x10 +# GFX1250: v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x6f,0xd7,0xfd,0xd4,0x00,0x10] + +0x05,0x00,0x6f,0xd7,0x7b,0xfa,0x01,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x6f,0xd7,0x7b,0xfa,0x01,0x00] + +0x05,0x00,0x6f,0xd7,0x01,0x05,0x02,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6f,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x00,0x6f,0xd7,0xff,0xff,0x03,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x6f,0xd7,0xff,0xff,0x03,0x00] + +0x05,0x00,0x6f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX1250: v_cvt_pk_f16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x6f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +0x05,0x00,0x6f,0xd7,0x6a,0xf6,0x00,0x00 +# GFX1250: v_cvt_pk_f16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x6f,0xd7,0x6a,0xf6,0x00,0x00] + 0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20 # GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, -v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20] # GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt index e138425aee92..7e8700acab11 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt @@ -286,6 +286,48 @@ # GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] # GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] +0xff,0x81,0x6f,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30 +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6f,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13 +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + 0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff # GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] # GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt index c6bde2241fa4..70dbcb8c9743 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt @@ -204,6 +204,18 @@ # GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] # GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +0xff,0x81,0x6f,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x6f,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] + +0x05,0x00,0x6f,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +0x05,0x00,0x6f,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05 +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] + +0x05,0x00,0x6f,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05 +# GFX1250: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + 0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f # GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] # GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] diff --git a/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt b/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt index deabf110f2e4..d9da627ad52e 100644 --- a/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt +++ b/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt @@ -2,7 +2,7 @@ # libraries, but expects them to exist in the process loading the plugin. This # doesn't work with DLLs on Windows (where a shared library can't have undefined # references), so just skip this testcase on Windows. -if ((NOT WIN32 OR LLVM_BUILD_LLVM_DYLIB) AND NOT CYGWIN) +if ((NOT WIN32 AND NOT CYGWIN) OR LLVM_BUILD_LLVM_DYLIB) unset(LLVM_LINK_COMPONENTS) add_llvm_library(InlineAdvisorPlugin MODULE BUILDTREE_ONLY InlineAdvisorPlugin.cpp diff --git a/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt b/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt index 0b37cebe3da6..941e18efc1a5 100644 --- a/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt +++ b/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt @@ -2,7 +2,7 @@ # libraries, but expects them to exist in the process loading the plugin. This # doesn't work with DLLs on Windows (where a shared library can't have undefined # references), so just skip this testcase on Windows. -if ((NOT WIN32 OR LLVM_BUILD_LLVM_DYLIB) AND NOT CYGWIN) +if ((NOT WIN32 AND NOT CYGWIN) OR LLVM_BUILD_LLVM_DYLIB) unset(LLVM_LINK_COMPONENTS) add_llvm_library(InlineOrderPlugin MODULE BUILDTREE_ONLY InlineOrderPlugin.cpp |
