; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=VI %s ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=CI %s ; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s ; RUN: llc -mtriple=amdgcn -global-isel=1 -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GISEL %s declare hidden void @external_void_func_i1(i1) #0 declare hidden void @external_void_func_i1_signext(i1 signext) #0 declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0 declare hidden void @external_void_func_i8(i8) #0 declare hidden void @external_void_func_i8_signext(i8 signext) #0 declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0 declare hidden void @external_void_func_i16(i16) #0 declare hidden void @external_void_func_i16_signext(i16 signext) #0 declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0 declare hidden void @external_void_func_i32(i32) #0 declare hidden void @external_void_func_i64(i64) #0 declare hidden void @external_void_func_v2i64(<2 x i64>) #0 declare hidden void @external_void_func_v3i64(<3 x i64>) #0 declare hidden void @external_void_func_v4i64(<4 x i64>) #0 declare hidden void @external_void_func_f16(half) #0 declare hidden void @external_void_func_f32(float) #0 declare hidden void @external_void_func_f64(double) #0 declare hidden void @external_void_func_v2f32(<2 x float>) #0 declare hidden void @external_void_func_v2f64(<2 x double>) #0 declare hidden void @external_void_func_v3f32(<3 x float>) #0 declare hidden void @external_void_func_v3f64(<3 x double>) #0 declare hidden void @external_void_func_v5f32(<5 x float>) #0 declare hidden void @external_void_func_v2i16(<2 x i16>) #0 declare hidden void @external_void_func_v2f16(<2 x half>) #0 declare hidden void @external_void_func_v3i16(<3 x i16>) #0 declare hidden void @external_void_func_v3f16(<3 x half>) #0 declare hidden void @external_void_func_v4i16(<4 x i16>) #0 declare hidden void @external_void_func_v4f16(<4 x half>) #0 declare hidden void @external_void_func_v2i32(<2 x i32>) #0 declare hidden void @external_void_func_v3i32(<3 x i32>) #0 declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0 declare hidden void @external_void_func_v4i32(<4 x i32>) #0 declare hidden void @external_void_func_v5i32(<5 x i32>) #0 declare hidden void @external_void_func_v8i32(<8 x i32>) #0 declare hidden void @external_void_func_v16i32(<16 x i32>) #0 declare hidden void @external_void_func_v32i32(<32 x i32>) #0 declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0 ; return value and argument declare hidden i32 @external_i32_func_i32(i32) #0 ; Structs declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0 declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0 declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0 declare hidden void @external_void_func_v16i8(<16 x i8>) #0 ; FIXME: Should be passing -1 define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; VI-LABEL: test_call_external_void_func_i1_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i1_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i1_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i1_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i1_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i1_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_i1(i1 true) ret void } define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; VI-LABEL: test_call_external_void_func_i1_signext: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i1_signext: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: v_bfe_i32 v0, v0, 0, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i1_signext: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: v_bfe_i32 v0, v0, 0, 1 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i1_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_signext@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_signext@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i1_signext: ; HSA: ; %bb.0: ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1_signext@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_signext@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: v_bfe_i32 v0, v0, 0, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i1_signext: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s2, -1 ; GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %var = load volatile i1, ptr addrspace(1) poison call void @external_void_func_i1_signext(i1 signext %var) ret void } ; FIXME: load should be scheduled before getpc define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; VI-LABEL: test_call_external_void_func_i1_zeroext: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: v_and_b32_e32 v0, 1, v0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i1_zeroext: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: v_and_b32_e32 v0, 1, v0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i1_zeroext: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i1_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_zeroext@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_zeroext@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i1_zeroext: ; HSA: ; %bb.0: ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1_zeroext@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_zeroext@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: v_and_b32_e32 v0, 1, v0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i1_zeroext: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s2, -1 ; GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %var = load volatile i1, ptr addrspace(1) poison call void @external_void_func_i1_zeroext(i1 zeroext %var) ret void } define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_i8_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i8_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i8_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_imm: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b ; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4 ; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12 ; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: test_call_external_void_func_i8_imm: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4 ; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12 ; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-FAKE16-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i8_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i8_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_i8(i8 123) ret void } ; FIXME: don't wait before call define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; VI-LABEL: test_call_external_void_func_i8_signext: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i8_signext: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i8_signext: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i8_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_signext@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_signext@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i8_signext: ; HSA: ; %bb.0: ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 glc ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8_signext@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8_signext@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i8_signext: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s2, -1 ; GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GISEL-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %var = load volatile i8, ptr addrspace(1) poison call void @external_void_func_i8_signext(i8 signext %var) ret void } define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; VI-LABEL: test_call_external_void_func_i8_zeroext: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i8_zeroext: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i8_zeroext: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i8_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_zeroext@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_zeroext@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i8_zeroext: ; HSA: ; %bb.0: ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8_zeroext@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8_zeroext@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i8_zeroext: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s2, -1 ; GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GISEL-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %var = load volatile i8, ptr addrspace(1) poison call void @external_void_func_i8_zeroext(i8 zeroext %var) ret void } define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_i16_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i16_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i16_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_imm: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b ; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4 ; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12 ; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: test_call_external_void_func_i16_imm: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4 ; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12 ; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-FAKE16-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i16_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i16_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_i16(i16 123) ret void } define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; VI-LABEL: test_call_external_void_func_i16_signext: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i16_signext: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i16_signext: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i16_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_signext@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_signext@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i16_signext: ; HSA: ; %bb.0: ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_sshort v0, off, s[4:7], 0 glc ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16_signext@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16_signext@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i16_signext: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s2, -1 ; GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GISEL-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %var = load volatile i16, ptr addrspace(1) poison call void @external_void_func_i16_signext(i16 signext %var) ret void } define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; VI-LABEL: test_call_external_void_func_i16_zeroext: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i16_zeroext: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i16_zeroext: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i16_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_zeroext@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_zeroext@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i16_zeroext: ; HSA: ; %bb.0: ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16_zeroext@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16_zeroext@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i16_zeroext: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s2, -1 ; GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GISEL-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %var = load volatile i16, ptr addrspace(1) poison call void @external_void_func_i16_zeroext(i16 zeroext %var) ret void } define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_i32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 42 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 42 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 42 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 42 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 42 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_i32(i32 42) ret void } define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; VI-LABEL: test_call_external_void_func_i64_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_i64_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_i64_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0x7b ; SDAG-NEXT: v_mov_b32_e32 v1, 0 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_i64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i64@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_i64_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i64@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i64@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0x7b ; HSA-NEXT: v_mov_b32_e32 v1, 0 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_i64_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b ; GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_i64(i64 123) ret void } define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; VI-LABEL: test_call_external_void_func_v2i64: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], 0 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i64: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], 0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v2i64: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], 0 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b64 s[4:5], 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v2i64: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v2i64: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GISEL-NEXT: v_mov_b32_e32 v3, s3 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = load <2 x i64>, ptr addrspace(1) null call void @external_void_func_v2i64(<2 x i64> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2i64_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 ; VI-NEXT: v_mov_b32_e32 v3, 4 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i64_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 ; CI-NEXT: v_mov_b32_e32 v3, 4 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v2i64_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1 ; SDAG-NEXT: v_mov_b32_e32 v1, 2 ; SDAG-NEXT: v_mov_b32_e32 v2, 3 ; SDAG-NEXT: v_mov_b32_e32 v3, 4 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v2i64_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1 ; HSA-NEXT: v_mov_b32_e32 v1, 2 ; HSA-NEXT: v_mov_b32_e32 v2, 3 ; HSA-NEXT: v_mov_b32_e32 v3, 4 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v2i64_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1 ; GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GISEL-NEXT: v_mov_b32_e32 v2, 3 ; GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v2i64(<2 x i64> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; VI-LABEL: test_call_external_void_func_v3i64: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], 0 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v4, 1 ; VI-NEXT: v_mov_b32_e32 v5, 2 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3i64: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], 0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v4, 1 ; CI-NEXT: v_mov_b32_e32 v5, 2 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3i64: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], 0 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v4, 1 ; SDAG-NEXT: v_mov_b32_e32 v5, 2 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b64 s[4:5], 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i64@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3i64: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i64@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i64@rel32@hi+12 ; HSA-NEXT: v_mov_b32_e32 v4, 1 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v5, 2 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3i64: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v4, 1 ; GISEL-NEXT: v_mov_b32_e32 v5, 2 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GISEL-NEXT: v_mov_b32_e32 v3, s3 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %load = load <2 x i64>, ptr addrspace(1) null %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> call void @external_void_func_v3i64(<3 x i64> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; VI-LABEL: test_call_external_void_func_v4i64: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], 0 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v4, 1 ; VI-NEXT: v_mov_b32_e32 v5, 2 ; VI-NEXT: v_mov_b32_e32 v6, 3 ; VI-NEXT: v_mov_b32_e32 v7, 4 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i64: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], 0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v4, 1 ; CI-NEXT: v_mov_b32_e32 v5, 2 ; CI-NEXT: v_mov_b32_e32 v6, 3 ; CI-NEXT: v_mov_b32_e32 v7, 4 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v4i64: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], 0 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v4, 1 ; SDAG-NEXT: v_mov_b32_e32 v5, 2 ; SDAG-NEXT: v_mov_b32_e32 v6, 3 ; SDAG-NEXT: v_mov_b32_e32 v7, 4 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b64 s[4:5], 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i64@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v4i64: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i64@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i64@rel32@hi+12 ; HSA-NEXT: v_mov_b32_e32 v4, 1 ; HSA-NEXT: v_mov_b32_e32 v5, 2 ; HSA-NEXT: v_mov_b32_e32 v6, 3 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v7, 4 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v4i64: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v4, 1 ; GISEL-NEXT: v_mov_b32_e32 v5, 2 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GISEL-NEXT: v_mov_b32_e32 v3, s3 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: v_mov_b32_e32 v6, 3 ; GISEL-NEXT: v_mov_b32_e32 v7, 4 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %load = load <2 x i64>, ptr addrspace(1) null %val = shufflevector <2 x i64> %load, <2 x i64> , <4 x i32> call void @external_void_func_v4i64(<4 x i64> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; VI-LABEL: test_call_external_void_func_f16_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x4400 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_f16_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 4.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_f16_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0x4400 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_f16_imm: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400 ; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4 ; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12 ; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: test_call_external_void_func_f16_imm: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4 ; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12 ; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-FAKE16-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_f16_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0x4400 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_f16_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x4400 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_f16(half 4.0) ret void } define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_f32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 4.0 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_f32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 4.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_f32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 4.0 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_f32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_f32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 4.0 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_f32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 4.0 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_f32(float 4.0) ret void } define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2f32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2f32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v2f32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2f32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v2f32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1.0 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v2f32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v2f32(<2 x float> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 4.0 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3f32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3f32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 ; SDAG-NEXT: v_mov_b32_e32 v2, 4.0 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3f32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3f32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1.0 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0 ; HSA-NEXT: v_mov_b32_e32 v2, 4.0 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3f32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v2, 4.0 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v3f32(<3 x float> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v5f32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 4.0 ; VI-NEXT: v_mov_b32_e32 v3, -1.0 ; VI-NEXT: v_mov_b32_e32 v4, 0.5 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v5f32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 ; CI-NEXT: v_mov_b32_e32 v3, -1.0 ; CI-NEXT: v_mov_b32_e32 v4, 0.5 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v5f32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 ; SDAG-NEXT: v_mov_b32_e32 v2, 4.0 ; SDAG-NEXT: v_mov_b32_e32 v3, -1.0 ; SDAG-NEXT: v_mov_b32_e32 v4, 0.5 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v5f32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v5f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v5f32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v5f32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v5f32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v5f32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1.0 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0 ; HSA-NEXT: v_mov_b32_e32 v2, 4.0 ; HSA-NEXT: v_mov_b32_e32 v3, -1.0 ; HSA-NEXT: v_mov_b32_e32 v4, 0.5 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v5f32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v2, 4.0 ; GISEL-NEXT: v_mov_b32_e32 v3, -1.0 ; GISEL-NEXT: v_mov_b32_e32 v4, 0.5 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v5f32(<5 x float> ) ret void } define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_f64_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 0x40100000 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_f64_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0x40100000 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_f64_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: v_mov_b32_e32 v1, 0x40100000 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_f64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f64@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_f64_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f64@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f64@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0 ; HSA-NEXT: v_mov_b32_e32 v1, 0x40100000 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_f64_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_f64(double 4.0) ret void } define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2f64_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 0 ; VI-NEXT: v_mov_b32_e32 v3, 0x40100000 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2f64_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: v_mov_b32_e32 v3, 0x40100000 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v2f64_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 ; SDAG-NEXT: v_mov_b32_e32 v2, 0 ; SDAG-NEXT: v_mov_b32_e32 v3, 0x40100000 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2f64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f64@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v2f64_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f64@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0 ; HSA-NEXT: v_mov_b32_e32 v2, 0 ; HSA-NEXT: v_mov_b32_e32 v3, 0x40100000 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v2f64_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v2f64(<2 x double> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f64_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 0 ; VI-NEXT: v_mov_b32_e32 v3, 0x40100000 ; VI-NEXT: v_mov_b32_e32 v4, 0 ; VI-NEXT: v_mov_b32_e32 v5, 0x40200000 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3f64_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: v_mov_b32_e32 v3, 0x40100000 ; CI-NEXT: v_mov_b32_e32 v4, 0 ; CI-NEXT: v_mov_b32_e32 v5, 0x40200000 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3f64_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: v_mov_b32_e32 v1, 2.0 ; SDAG-NEXT: v_mov_b32_e32 v2, 0 ; SDAG-NEXT: v_mov_b32_e32 v3, 0x40100000 ; SDAG-NEXT: v_mov_b32_e32 v4, 0 ; SDAG-NEXT: v_mov_b32_e32 v5, 0x40200000 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3f64_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f64@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3f64_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f64@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f64@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0 ; HSA-NEXT: v_mov_b32_e32 v1, 2.0 ; HSA-NEXT: v_mov_b32_e32 v2, 0 ; HSA-NEXT: v_mov_b32_e32 v3, 0x40100000 ; HSA-NEXT: v_mov_b32_e32 v4, 0 ; HSA-NEXT: v_mov_b32_e32 v5, 0x40200000 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3f64_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: v_mov_b32_e32 v1, 2.0 ; GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GISEL-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v3f64(<3 x double> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; VI-LABEL: test_call_external_void_func_v2i16: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i16: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v2i16: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i16@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i16@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v2i16: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v2i16: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_load_dword s8, s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s8 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = load <2 x i16>, ptr addrspace(1) poison call void @external_void_func_v2i16(<2 x i16> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; VI-LABEL: test_call_external_void_func_v3i16: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3i16: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[3:4], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshr_b64 v[1:2], v[3:4], 16 ; CI-NEXT: v_mov_b32_e32 v0, v3 ; CI-NEXT: v_mov_b32_e32 v2, v4 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3i16: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3i16: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3i16: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = load <3 x i16>, ptr addrspace(1) poison call void @external_void_func_v3i16(<3 x i16> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; VI-LABEL: test_call_external_void_func_v3f16: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3f16: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3f16: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3f16: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3f16: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = load <3 x half>, ptr addrspace(1) poison call void @external_void_func_v3f16(<3 x half> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3i16_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x20001 ; VI-NEXT: v_mov_b32_e32 v1, 3 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3i16_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3i16_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0x20001 ; SDAG-NEXT: v_mov_b32_e32 v1, 3 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i16_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3i16_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0x20001 ; HSA-NEXT: v_mov_b32_e32 v1, 3 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3i16_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x20001 ; GISEL-NEXT: v_mov_b32_e32 v1, 3 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v3i16(<3 x i16> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f16_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; VI-NEXT: v_mov_b32_e32 v1, 0x4400 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3f16_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3f16_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; SDAG-NEXT: v_mov_b32_e32 v1, 0x4400 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3f16_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3f16_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; HSA-NEXT: v_mov_b32_e32 v1, 0x4400 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3f16_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x4400 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v3f16(<3 x half> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; VI-LABEL: test_call_external_void_func_v4i16: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i16: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; CI-NEXT: v_mov_b32_e32 v2, v1 ; CI-NEXT: v_mov_b32_e32 v1, v4 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v4i16: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v4i16: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v4i16: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = load <4 x i16>, ptr addrspace(1) poison call void @external_void_func_v4i16(<4 x i16> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v4i16_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x20001 ; VI-NEXT: v_mov_b32_e32 v1, 0x40003 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i16_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 ; CI-NEXT: v_mov_b32_e32 v3, 4 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v4i16_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 0x20001 ; SDAG-NEXT: v_mov_b32_e32 v1, 0x40003 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i16_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v4i16_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 0x20001 ; HSA-NEXT: v_mov_b32_e32 v1, 0x40003 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v4i16_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x20001 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x40003 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v4i16(<4 x i16> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; VI-LABEL: test_call_external_void_func_v2f16: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2f16: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v2f16: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f16@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f16@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v2f16: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f16@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f16@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v2f16: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_load_dword s8, s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s8 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = load <2 x half>, ptr addrspace(1) poison call void @external_void_func_v2f16(<2 x half> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; VI-LABEL: test_call_external_void_func_v2i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v2i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v2i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v2i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = load <2 x i32>, ptr addrspace(1) poison call void @external_void_func_v2i32(<2 x i32> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2i32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v2i32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1 ; SDAG-NEXT: v_mov_b32_e32 v1, 2 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v2i32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1 ; HSA-NEXT: v_mov_b32_e32 v1, 2 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v2i32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1 ; GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v2i32(<2 x i32> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_v3i32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 3 ; VI-NEXT: v_mov_b32_e32 v1, 4 ; VI-NEXT: v_mov_b32_e32 v2, 5 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3i32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 3 ; CI-NEXT: v_mov_b32_e32 v1, 4 ; CI-NEXT: v_mov_b32_e32 v2, 5 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3i32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 3 ; SDAG-NEXT: v_mov_b32_e32 v1, 4 ; SDAG-NEXT: v_mov_b32_e32 v2, 5 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_mov_b32_e32 v2, 5 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3i32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 3 ; HSA-NEXT: v_mov_b32_e32 v1, 4 ; HSA-NEXT: v_mov_b32_e32 v2, 5 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3i32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 3 ; GISEL-NEXT: v_mov_b32_e32 v1, 4 ; GISEL-NEXT: v_mov_b32_e32 v2, 5 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v3i32(<3 x i32> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_v3i32_i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 3 ; VI-NEXT: v_mov_b32_e32 v1, 4 ; VI-NEXT: v_mov_b32_e32 v2, 5 ; VI-NEXT: v_mov_b32_e32 v3, 6 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3i32_i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 3 ; CI-NEXT: v_mov_b32_e32 v1, 4 ; CI-NEXT: v_mov_b32_e32 v2, 5 ; CI-NEXT: v_mov_b32_e32 v3, 6 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v3i32_i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 3 ; SDAG-NEXT: v_mov_b32_e32 v1, 4 ; SDAG-NEXT: v_mov_b32_e32 v2, 5 ; SDAG-NEXT: v_mov_b32_e32 v3, 6 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v3i32_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i32_i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i32_i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v3i32_i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 3 ; HSA-NEXT: v_mov_b32_e32 v1, 4 ; HSA-NEXT: v_mov_b32_e32 v2, 5 ; HSA-NEXT: v_mov_b32_e32 v3, 6 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v3i32_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 3 ; GISEL-NEXT: v_mov_b32_e32 v1, 4 ; GISEL-NEXT: v_mov_b32_e32 v2, 5 ; GISEL-NEXT: v_mov_b32_e32 v3, 6 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v3i32_i32(<3 x i32> , i32 6) ret void } define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; VI-LABEL: test_call_external_void_func_v4i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v4i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v4i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v4i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GISEL-NEXT: v_mov_b32_e32 v3, s3 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = load <4 x i32>, ptr addrspace(1) poison call void @external_void_func_v4i32(<4 x i32> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v4i32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 ; VI-NEXT: v_mov_b32_e32 v3, 4 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 ; CI-NEXT: v_mov_b32_e32 v3, 4 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v4i32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1 ; SDAG-NEXT: v_mov_b32_e32 v1, 2 ; SDAG-NEXT: v_mov_b32_e32 v2, 3 ; SDAG-NEXT: v_mov_b32_e32 v3, 4 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v4i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v4i32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1 ; HSA-NEXT: v_mov_b32_e32 v1, 2 ; HSA-NEXT: v_mov_b32_e32 v2, 3 ; HSA-NEXT: v_mov_b32_e32 v3, 4 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v4i32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1 ; GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GISEL-NEXT: v_mov_b32_e32 v2, 3 ; GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v4i32(<4 x i32> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v5i32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 ; VI-NEXT: v_mov_b32_e32 v3, 4 ; VI-NEXT: v_mov_b32_e32 v4, 5 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v5i32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 ; CI-NEXT: v_mov_b32_e32 v3, 4 ; CI-NEXT: v_mov_b32_e32 v4, 5 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v5i32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1 ; SDAG-NEXT: v_mov_b32_e32 v1, 2 ; SDAG-NEXT: v_mov_b32_e32 v2, 3 ; SDAG-NEXT: v_mov_b32_e32 v3, 4 ; SDAG-NEXT: v_mov_b32_e32 v4, 5 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v5i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: v_mov_b32_e32 v4, 5 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v5i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v5i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v5i32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v5i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v5i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1 ; HSA-NEXT: v_mov_b32_e32 v1, 2 ; HSA-NEXT: v_mov_b32_e32 v2, 3 ; HSA-NEXT: v_mov_b32_e32 v3, 4 ; HSA-NEXT: v_mov_b32_e32 v4, 5 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v5i32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1 ; GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GISEL-NEXT: v_mov_b32_e32 v2, 3 ; GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GISEL-NEXT: v_mov_b32_e32 v4, 5 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v5i32(<5 x i32> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; VI-LABEL: test_call_external_void_func_v8i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v8i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v8i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v8i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v8i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_waitcnt lgkmcnt(0) ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v8i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_load_dwordx8 s[8:15], s[0:1], 0x0 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s8 ; GISEL-NEXT: v_mov_b32_e32 v1, s9 ; GISEL-NEXT: v_mov_b32_e32 v2, s10 ; GISEL-NEXT: v_mov_b32_e32 v3, s11 ; GISEL-NEXT: v_mov_b32_e32 v4, s12 ; GISEL-NEXT: v_mov_b32_e32 v5, s13 ; GISEL-NEXT: v_mov_b32_e32 v6, s14 ; GISEL-NEXT: v_mov_b32_e32 v7, s15 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %ptr = load ptr addrspace(1), ptr addrspace(4) poison %val = load <8 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v8i32(<8 x i32> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v8i32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 ; VI-NEXT: v_mov_b32_e32 v3, 4 ; VI-NEXT: v_mov_b32_e32 v4, 5 ; VI-NEXT: v_mov_b32_e32 v5, 6 ; VI-NEXT: v_mov_b32_e32 v6, 7 ; VI-NEXT: v_mov_b32_e32 v7, 8 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v8i32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 ; CI-NEXT: v_mov_b32_e32 v3, 4 ; CI-NEXT: v_mov_b32_e32 v4, 5 ; CI-NEXT: v_mov_b32_e32 v5, 6 ; CI-NEXT: v_mov_b32_e32 v6, 7 ; CI-NEXT: v_mov_b32_e32 v7, 8 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v8i32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: v_mov_b32_e32 v0, 1 ; SDAG-NEXT: v_mov_b32_e32 v1, 2 ; SDAG-NEXT: v_mov_b32_e32 v2, 3 ; SDAG-NEXT: v_mov_b32_e32 v3, 4 ; SDAG-NEXT: v_mov_b32_e32 v4, 5 ; SDAG-NEXT: v_mov_b32_e32 v5, 6 ; SDAG-NEXT: v_mov_b32_e32 v6, 7 ; SDAG-NEXT: v_mov_b32_e32 v7, 8 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v8i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v8i32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 1 ; HSA-NEXT: v_mov_b32_e32 v1, 2 ; HSA-NEXT: v_mov_b32_e32 v2, 3 ; HSA-NEXT: v_mov_b32_e32 v3, 4 ; HSA-NEXT: v_mov_b32_e32 v4, 5 ; HSA-NEXT: v_mov_b32_e32 v5, 6 ; HSA-NEXT: v_mov_b32_e32 v6, 7 ; HSA-NEXT: v_mov_b32_e32 v7, 8 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v8i32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 1 ; GISEL-NEXT: v_mov_b32_e32 v1, 2 ; GISEL-NEXT: v_mov_b32_e32 v2, 3 ; GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GISEL-NEXT: v_mov_b32_e32 v4, 5 ; GISEL-NEXT: v_mov_b32_e32 v5, 6 ; GISEL-NEXT: v_mov_b32_e32 v6, 7 ; GISEL-NEXT: v_mov_b32_e32 v7, 8 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm call void @external_void_func_v8i32(<8 x i32> ) ret void } define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; VI-LABEL: test_call_external_void_func_v16i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v16i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v16i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v16i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v16i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v16i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_waitcnt lgkmcnt(0) ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v16i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v16i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v16i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x0 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s8 ; GISEL-NEXT: v_mov_b32_e32 v1, s9 ; GISEL-NEXT: v_mov_b32_e32 v2, s10 ; GISEL-NEXT: v_mov_b32_e32 v3, s11 ; GISEL-NEXT: v_mov_b32_e32 v4, s12 ; GISEL-NEXT: v_mov_b32_e32 v5, s13 ; GISEL-NEXT: v_mov_b32_e32 v6, s14 ; GISEL-NEXT: v_mov_b32_e32 v7, s15 ; GISEL-NEXT: v_mov_b32_e32 v8, s16 ; GISEL-NEXT: v_mov_b32_e32 v9, s17 ; GISEL-NEXT: v_mov_b32_e32 v10, s18 ; GISEL-NEXT: v_mov_b32_e32 v11, s19 ; GISEL-NEXT: v_mov_b32_e32 v12, s20 ; GISEL-NEXT: v_mov_b32_e32 v13, s21 ; GISEL-NEXT: v_mov_b32_e32 v14, s22 ; GISEL-NEXT: v_mov_b32_e32 v15, s23 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %ptr = load ptr addrspace(1), ptr addrspace(4) poison %val = load <16 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v16i32(<16 x i32> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; VI-LABEL: test_call_external_void_func_v32i32: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_getpc_b64 s[8:9] ; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_waitcnt vmcnt(6) ; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[8:9] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v32i32: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_getpc_b64 s[8:9] ; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_waitcnt vmcnt(6) ; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[8:9] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v32i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SDAG-NEXT: s_mov_b32 s6, -1 ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; SDAG-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 ; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 ; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 ; SDAG-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; SDAG-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_getpc_b64 s[8:9] ; SDAG-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_waitcnt vmcnt(6) ; SDAG-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v32i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32@rel32@hi+12 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x7 ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48 ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64 ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80 ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(7) ; GFX11-NEXT: scratch_store_b32 off, v31, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v32i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_waitcnt lgkmcnt(0) ; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 ; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64 ; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80 ; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[12:13] ; HSA-NEXT: s_add_u32 s12, s12, external_void_func_v32i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s13, s13, external_void_func_v32i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_waitcnt vmcnt(7) ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; HSA-NEXT: s_swappc_b64 s[30:31], s[12:13] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v32i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s54, -1 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x40 ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s55, 0xe00000 ; GISEL-NEXT: s_add_u32 s52, s52, s3 ; GISEL-NEXT: s_addc_u32 s53, s53, 0 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s23 ; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12 ; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; GISEL-NEXT: v_mov_b32_e32 v0, s36 ; GISEL-NEXT: v_mov_b32_e32 v1, s37 ; GISEL-NEXT: v_mov_b32_e32 v2, s38 ; GISEL-NEXT: v_mov_b32_e32 v3, s39 ; GISEL-NEXT: v_mov_b32_e32 v4, s40 ; GISEL-NEXT: v_mov_b32_e32 v5, s41 ; GISEL-NEXT: v_mov_b32_e32 v6, s42 ; GISEL-NEXT: v_mov_b32_e32 v7, s43 ; GISEL-NEXT: v_mov_b32_e32 v8, s44 ; GISEL-NEXT: v_mov_b32_e32 v9, s45 ; GISEL-NEXT: v_mov_b32_e32 v10, s46 ; GISEL-NEXT: v_mov_b32_e32 v11, s47 ; GISEL-NEXT: v_mov_b32_e32 v12, s48 ; GISEL-NEXT: v_mov_b32_e32 v13, s49 ; GISEL-NEXT: v_mov_b32_e32 v14, s50 ; GISEL-NEXT: v_mov_b32_e32 v15, s51 ; GISEL-NEXT: v_mov_b32_e32 v16, s8 ; GISEL-NEXT: v_mov_b32_e32 v17, s9 ; GISEL-NEXT: v_mov_b32_e32 v18, s10 ; GISEL-NEXT: v_mov_b32_e32 v19, s11 ; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55] ; GISEL-NEXT: v_mov_b32_e32 v20, s12 ; GISEL-NEXT: v_mov_b32_e32 v21, s13 ; GISEL-NEXT: v_mov_b32_e32 v22, s14 ; GISEL-NEXT: v_mov_b32_e32 v23, s15 ; GISEL-NEXT: v_mov_b32_e32 v24, s16 ; GISEL-NEXT: v_mov_b32_e32 v25, s17 ; GISEL-NEXT: v_mov_b32_e32 v26, s18 ; GISEL-NEXT: v_mov_b32_e32 v27, s19 ; GISEL-NEXT: v_mov_b32_e32 v28, s20 ; GISEL-NEXT: v_mov_b32_e32 v29, s21 ; GISEL-NEXT: v_mov_b32_e32 v30, s22 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %ptr = load ptr addrspace(1), ptr addrspace(4) poison %val = load <32 x i32>, ptr addrspace(1) %ptr call void @external_void_func_v32i32(<32 x i32> %val) ret void } define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_v32i32_i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_waitcnt vmcnt(8) ; VI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(8) ; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v32i32_i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_waitcnt vmcnt(8) ; CI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(8) ; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v32i32_i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SDAG-NEXT: s_mov_b32 s6, -1 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; SDAG-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 ; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 ; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 ; SDAG-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; SDAG-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; SDAG-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_waitcnt vmcnt(8) ; SDAG-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 ; SDAG-NEXT: s_waitcnt vmcnt(8) ; SDAG-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v32i32_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32_i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32_i32@rel32@hi+12 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x8 ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112 ; GFX11-NEXT: buffer_load_b32 v32, off, s[4:7], 0 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16 ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32 ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48 ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64 ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80 ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_add_i32 s4, s32, 4 ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: scratch_store_b32 off, v31, s32 ; GFX11-NEXT: s_waitcnt vmcnt(7) ; GFX11-NEXT: scratch_store_b32 off, v32, s4 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v32i32_i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: s_waitcnt lgkmcnt(0) ; HSA-NEXT: buffer_load_dword v32, off, s[8:11], 0 ; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 ; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64 ; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80 ; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v32i32_i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32_i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_waitcnt vmcnt(8) ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; HSA-NEXT: s_waitcnt vmcnt(8) ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v32i32_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s54, -1 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x40 ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x0 ; GISEL-NEXT: s_load_dword s2, s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s55, 0xe00000 ; GISEL-NEXT: s_add_u32 s52, s52, s5 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_addc_u32 s53, s53, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-NEXT: ; kill: killed $sgpr0_sgpr1 ; GISEL-NEXT: ; kill: killed $sgpr0_sgpr1 ; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 ; GISEL-NEXT: v_mov_b32_e32 v0, s23 ; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 ; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; GISEL-NEXT: v_mov_b32_e32 v0, s36 ; GISEL-NEXT: v_mov_b32_e32 v1, s37 ; GISEL-NEXT: v_mov_b32_e32 v2, s38 ; GISEL-NEXT: v_mov_b32_e32 v3, s39 ; GISEL-NEXT: v_mov_b32_e32 v4, s40 ; GISEL-NEXT: v_mov_b32_e32 v5, s41 ; GISEL-NEXT: v_mov_b32_e32 v6, s42 ; GISEL-NEXT: v_mov_b32_e32 v7, s43 ; GISEL-NEXT: v_mov_b32_e32 v8, s44 ; GISEL-NEXT: v_mov_b32_e32 v9, s45 ; GISEL-NEXT: v_mov_b32_e32 v10, s46 ; GISEL-NEXT: v_mov_b32_e32 v11, s47 ; GISEL-NEXT: v_mov_b32_e32 v12, s48 ; GISEL-NEXT: v_mov_b32_e32 v13, s49 ; GISEL-NEXT: v_mov_b32_e32 v14, s50 ; GISEL-NEXT: v_mov_b32_e32 v15, s51 ; GISEL-NEXT: v_mov_b32_e32 v16, s8 ; GISEL-NEXT: v_mov_b32_e32 v17, s9 ; GISEL-NEXT: v_mov_b32_e32 v18, s10 ; GISEL-NEXT: v_mov_b32_e32 v19, s11 ; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55] ; GISEL-NEXT: v_mov_b32_e32 v20, s12 ; GISEL-NEXT: v_mov_b32_e32 v21, s13 ; GISEL-NEXT: v_mov_b32_e32 v22, s14 ; GISEL-NEXT: v_mov_b32_e32 v23, s15 ; GISEL-NEXT: v_mov_b32_e32 v24, s16 ; GISEL-NEXT: v_mov_b32_e32 v25, s17 ; GISEL-NEXT: v_mov_b32_e32 v26, s18 ; GISEL-NEXT: v_mov_b32_e32 v27, s19 ; GISEL-NEXT: v_mov_b32_e32 v28, s20 ; GISEL-NEXT: v_mov_b32_e32 v29, s21 ; GISEL-NEXT: v_mov_b32_e32 v30, s22 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison %val0 = load <32 x i32>, ptr addrspace(1) %ptr0 %val1 = load i32, ptr addrspace(1) poison call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1) ret void } define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { ; VI-LABEL: test_call_external_i32_func_i32_imm: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s50, -1 ; VI-NEXT: s_mov_b32 s51, 0xe80000 ; VI-NEXT: s_add_u32 s48, s48, s5 ; VI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 ; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 42 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_mov_b32 s39, 0xf000 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_i32_func_i32_imm: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s50, -1 ; CI-NEXT: s_mov_b32 s51, 0xe8f000 ; CI-NEXT: s_add_u32 s48, s48, s5 ; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x9 ; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 42 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_mov_b32 s39, 0xf000 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_i32_func_i32_imm: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s50, -1 ; SDAG-NEXT: s_mov_b32 s51, 0xe00000 ; SDAG-NEXT: s_add_u32 s48, s48, s5 ; SDAG-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 ; SDAG-NEXT: s_addc_u32 s49, s49, 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[48:49] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[50:51] ; SDAG-NEXT: v_mov_b32_e32 v0, 42 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_mov_b32 s39, 0xf000 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_i32_func_i32_imm: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[36:37], s[2:3], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_i32_func_i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_i32_func_i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_mov_b32 s39, 0x31016000 ; GFX11-NEXT: s_mov_b32 s38, -1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_i32_func_i32_imm: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x0 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_i32_func_i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_i32_func_i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 42 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_mov_b32 s39, 0x1100f000 ; HSA-NEXT: s_mov_b32 s38, -1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_i32_func_i32_imm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s50, -1 ; GISEL-NEXT: s_mov_b32 s51, 0xe00000 ; GISEL-NEXT: s_add_u32 s48, s48, s5 ; GISEL-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 ; GISEL-NEXT: s_addc_u32 s49, s49, 0 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[48:49] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, 42 ; GISEL-NEXT: s_mov_b64 s[2:3], s[50:51] ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xf000 ; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %val = call i32 @external_i32_func_i32(i32 42) store volatile i32 %val, ptr addrspace(1) %out ret void } define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; VI-LABEL: test_call_external_void_func_struct_i8_i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; VI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_struct_i8_i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_struct_i8_i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_struct_i8_i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_struct_i8_i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_load_u8 v0, off, s[4:7], 0 ; GFX11-NEXT: buffer_load_b32 v1, off, s[4:7], 0 offset:4 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_struct_i8_i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_waitcnt lgkmcnt(0) ; HSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 ; HSA-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:4 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_struct_i8_i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_struct_i8_i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_struct_i8_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[8:9] ; GISEL-NEXT: s_add_u32 s8, s8, external_void_func_struct_i8_i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s9, s9, external_void_func_struct_i8_i32@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: s_endpgm %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison %val = load { i8, i32 }, ptr addrspace(1) %ptr0 call void @external_void_func_struct_i8_i32({ i8, i32 } %val) ret void } define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: v_mov_b32_e32 v0, 3 ; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; VI-NEXT: v_mov_b32_e32 v0, 8 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_movk_i32 s32, 0x400 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: v_mov_b32_e32 v0, 3 ; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; CI-NEXT: v_mov_b32_e32 v0, 8 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_movk_i32 s32, 0x400 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: v_mov_b32_e32 v0, 3 ; SDAG-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; SDAG-NEXT: v_mov_b32_e32 v0, 8 ; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; SDAG-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_movk_i32 s32, 0x400 ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_waitcnt vmcnt(1) ; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; SDAG-NEXT: s_waitcnt vmcnt(1) ; SDAG-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 8 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, 16 ; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b8 off, v0, off ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v1, off offset:4 ; GFX11-TRUE16-NEXT: scratch_load_b64 v[0:1], off, off ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: scratch_store_b64 off, v[0:1], s32 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, 16 ; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b8 off, v0, off ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v1, off offset:4 ; GFX11-FAKE16-NEXT: scratch_load_b64 v[0:1], off, off ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: scratch_store_b64 off, v[0:1], s32 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-FAKE16-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: v_mov_b32_e32 v0, 3 ; HSA-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; HSA-NEXT: v_mov_b32_e32 v0, 8 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; HSA-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 ; HSA-NEXT: s_movk_i32 s32, 0x400 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_waitcnt vmcnt(1) ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; HSA-NEXT: s_waitcnt vmcnt(1) ; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: v_mov_b32_e32 v0, 3 ; GISEL-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; GISEL-NEXT: v_mov_b32_e32 v0, 8 ; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; GISEL-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:4 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_movk_i32 s32, 0x400 ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_waitcnt vmcnt(1) ; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], s32 ; GISEL-NEXT: s_waitcnt vmcnt(1) ; GISEL-NEXT: buffer_store_dword v1, off, s[36:39], s32 offset:4 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %val = alloca { i8, i32 }, align 8, addrspace(5) %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0 %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1 store i8 3, ptr addrspace(5) %gep0 store i32 8, ptr addrspace(5) %gep1 call void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val) ret void } define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: v_mov_b32_e32 v0, 3 ; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; VI-NEXT: v_mov_b32_e32 v0, 8 ; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; VI-NEXT: s_movk_i32 s32, 0x800 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; VI-NEXT: v_mov_b32_e32 v0, 8 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 ; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: v_mov_b32_e32 v0, 3 ; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; CI-NEXT: v_mov_b32_e32 v0, 8 ; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; CI-NEXT: s_movk_i32 s32, 0x800 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; CI-NEXT: v_mov_b32_e32 v0, 8 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 ; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s5 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: v_mov_b32_e32 v0, 3 ; SDAG-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; SDAG-NEXT: v_mov_b32_e32 v0, 8 ; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; SDAG-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; SDAG-NEXT: s_movk_i32 s32, 0x800 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_waitcnt vmcnt(1) ; SDAG-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; SDAG-NEXT: s_waitcnt vmcnt(1) ; SDAG-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; SDAG-NEXT: v_mov_b32_e32 v0, 8 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 ; SDAG-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: s_waitcnt vmcnt(1) ; SDAG-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], 0 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GFX11-TRUE16-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 8 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, 32 ; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b8 off, v0, off ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v1, off offset:4 ; GFX11-TRUE16-NEXT: scratch_load_b64 v[0:1], off, off ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: scratch_store_b64 off, v[0:1], s32 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 8 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, off, off offset:8 ; GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, off offset:12 ; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) ; GFX11-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_store_b32 v1, off, s[0:3], 0 dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-TRUE16-NEXT: s_nop 0 ; GFX11-TRUE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, 32 ; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3] ; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b8 off, v0, off ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v1, off offset:4 ; GFX11-FAKE16-NEXT: scratch_load_b64 v[0:1], off, off ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: scratch_store_b64 off, v[0:1], s32 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 8 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_load_u8 v0, off, off offset:8 ; GFX11-FAKE16-NEXT: scratch_load_b32 v1, off, off offset:12 ; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) ; GFX11-FAKE16-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: buffer_store_b32 v1, off, s[0:3], 0 dlc ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FAKE16-NEXT: s_nop 0 ; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-FAKE16-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: v_mov_b32_e32 v0, 3 ; HSA-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; HSA-NEXT: v_mov_b32_e32 v0, 8 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; HSA-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 ; HSA-NEXT: s_movk_i32 s32, 0x800 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_waitcnt vmcnt(1) ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; HSA-NEXT: s_waitcnt vmcnt(1) ; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 8 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:8 ; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12 ; HSA-NEXT: s_mov_b32 s7, 0x1100f000 ; HSA-NEXT: s_mov_b32 s6, -1 ; HSA-NEXT: s_waitcnt vmcnt(1) ; HSA-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: buffer_store_dword v1, off, s[4:7], 0 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s5 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: v_mov_b32_e32 v0, 3 ; GISEL-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; GISEL-NEXT: v_mov_b32_e32 v0, 8 ; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; GISEL-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:4 ; GISEL-NEXT: s_movk_i32 s32, 0x800 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_waitcnt vmcnt(1) ; GISEL-NEXT: buffer_store_dword v0, off, s[36:39], s32 ; GISEL-NEXT: s_waitcnt vmcnt(1) ; GISEL-NEXT: buffer_store_dword v1, off, s[36:39], s32 offset:4 ; GISEL-NEXT: v_mov_b32_e32 v0, 8 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 ; GISEL-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 ; GISEL-NEXT: s_mov_b32 s2, -1 ; GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GISEL-NEXT: s_waitcnt vmcnt(1) ; GISEL-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: buffer_store_dword v1, off, s[0:3], 0 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %in.val = alloca { i8, i32 }, align 8, addrspace(5) %out.val = alloca { i8, i32 }, align 8, addrspace(5) %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0 %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1 store i8 3, ptr addrspace(5) %in.gep0 store i32 8, ptr addrspace(5) %in.gep1 call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) byval({ i8, i32 }) %in.val) %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0 %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1 %out.val0 = load i8, ptr addrspace(5) %out.gep0 %out.val1 = load i32, ptr addrspace(5) %out.gep1 store volatile i8 %out.val0, ptr addrspace(1) poison store volatile i32 %out.val1, ptr addrspace(1) poison ret void } define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; VI-LABEL: test_call_external_void_func_v16i8: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v0 ; VI-NEXT: v_lshrrev_b32_e32 v18, 24, v0 ; VI-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; VI-NEXT: v_lshrrev_b32_e32 v9, 8, v2 ; VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v11, 24, v2 ; VI-NEXT: v_lshrrev_b32_e32 v13, 8, v3 ; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v3 ; VI-NEXT: v_lshrrev_b32_e32 v15, 24, v3 ; VI-NEXT: v_mov_b32_e32 v4, v1 ; VI-NEXT: v_mov_b32_e32 v8, v2 ; VI-NEXT: v_mov_b32_e32 v12, v3 ; VI-NEXT: v_mov_b32_e32 v1, v16 ; VI-NEXT: v_mov_b32_e32 v2, v17 ; VI-NEXT: v_mov_b32_e32 v3, v18 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v16i8: ; CI: ; %bb.0: ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; CI-NEXT: v_lshrrev_b32_e32 v17, 16, v0 ; CI-NEXT: v_lshrrev_b32_e32 v18, 24, v0 ; CI-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; CI-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; CI-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; CI-NEXT: v_lshrrev_b32_e32 v9, 8, v2 ; CI-NEXT: v_lshrrev_b32_e32 v10, 16, v2 ; CI-NEXT: v_lshrrev_b32_e32 v11, 24, v2 ; CI-NEXT: v_lshrrev_b32_e32 v13, 8, v3 ; CI-NEXT: v_lshrrev_b32_e32 v14, 16, v3 ; CI-NEXT: v_lshrrev_b32_e32 v15, 24, v3 ; CI-NEXT: v_mov_b32_e32 v4, v1 ; CI-NEXT: v_mov_b32_e32 v8, v2 ; CI-NEXT: v_mov_b32_e32 v12, v3 ; CI-NEXT: v_mov_b32_e32 v1, v16 ; CI-NEXT: v_mov_b32_e32 v2, v17 ; CI-NEXT: v_mov_b32_e32 v3, v18 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: test_call_external_void_func_v16i8: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s38, -1 ; SDAG-NEXT: s_mov_b32 s39, 0xe00000 ; SDAG-NEXT: s_add_u32 s36, s36, s3 ; SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SDAG-NEXT: s_mov_b32 s2, -1 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; SDAG-NEXT: s_addc_u32 s37, s37, 0 ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; SDAG-NEXT: v_lshrrev_b32_e32 v17, 16, v0 ; SDAG-NEXT: v_lshrrev_b32_e32 v18, 24, v0 ; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; SDAG-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; SDAG-NEXT: v_lshrrev_b32_e32 v9, 8, v2 ; SDAG-NEXT: v_lshrrev_b32_e32 v10, 16, v2 ; SDAG-NEXT: v_lshrrev_b32_e32 v11, 24, v2 ; SDAG-NEXT: v_lshrrev_b32_e32 v13, 8, v3 ; SDAG-NEXT: v_lshrrev_b32_e32 v14, 16, v3 ; SDAG-NEXT: v_lshrrev_b32_e32 v15, 24, v3 ; SDAG-NEXT: v_mov_b32_e32 v4, v1 ; SDAG-NEXT: v_mov_b32_e32 v8, v2 ; SDAG-NEXT: v_mov_b32_e32 v12, v3 ; SDAG-NEXT: v_mov_b32_e32 v1, v16 ; SDAG-NEXT: v_mov_b32_e32 v2, v17 ; SDAG-NEXT: v_mov_b32_e32 v3, v18 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v16i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v16i8@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v16i8@rel32@hi+12 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3 ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3 ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16 ; GFX11-NEXT: v_mov_b32_e32 v8, v2 ; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18 ; GFX11-NEXT: v_mov_b32_e32 v2, v17 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v16i8: ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 ; HSA-NEXT: s_add_u32 s0, s0, s9 ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 ; HSA-NEXT: s_waitcnt lgkmcnt(0) ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_getpc_b64 s[8:9] ; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v16i8@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v16i8@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0 ; HSA-NEXT: v_lshrrev_b32_e32 v18, 24, v0 ; HSA-NEXT: v_lshrrev_b32_e32 v5, 8, v1 ; HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; HSA-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; HSA-NEXT: v_lshrrev_b32_e32 v9, 8, v2 ; HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2 ; HSA-NEXT: v_lshrrev_b32_e32 v11, 24, v2 ; HSA-NEXT: v_lshrrev_b32_e32 v13, 8, v3 ; HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v3 ; HSA-NEXT: v_lshrrev_b32_e32 v15, 24, v3 ; HSA-NEXT: v_mov_b32_e32 v4, v1 ; HSA-NEXT: v_mov_b32_e32 v8, v2 ; HSA-NEXT: v_mov_b32_e32 v12, v3 ; HSA-NEXT: v_mov_b32_e32 v1, v16 ; HSA-NEXT: v_mov_b32_e32 v2, v17 ; HSA-NEXT: v_mov_b32_e32 v3, v18 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: test_call_external_void_func_v16i8: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s38, -1 ; GISEL-NEXT: s_mov_b32 s39, 0xe00000 ; GISEL-NEXT: s_add_u32 s36, s36, s3 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_lshr_b32 s8, s0, 8 ; GISEL-NEXT: s_lshr_b32 s9, s0, 16 ; GISEL-NEXT: s_lshr_b32 s10, s0, 24 ; GISEL-NEXT: s_lshr_b32 s11, s1, 8 ; GISEL-NEXT: s_lshr_b32 s12, s1, 16 ; GISEL-NEXT: s_lshr_b32 s13, s1, 24 ; GISEL-NEXT: s_lshr_b32 s14, s2, 8 ; GISEL-NEXT: s_lshr_b32 s15, s2, 16 ; GISEL-NEXT: s_lshr_b32 s16, s2, 24 ; GISEL-NEXT: s_lshr_b32 s17, s3, 8 ; GISEL-NEXT: s_lshr_b32 s18, s3, 16 ; GISEL-NEXT: s_lshr_b32 s19, s3, 24 ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: v_mov_b32_e32 v4, s1 ; GISEL-NEXT: v_mov_b32_e32 v8, s2 ; GISEL-NEXT: v_mov_b32_e32 v12, s3 ; GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v1, s8 ; GISEL-NEXT: v_mov_b32_e32 v2, s9 ; GISEL-NEXT: v_mov_b32_e32 v3, s10 ; GISEL-NEXT: v_mov_b32_e32 v5, s11 ; GISEL-NEXT: v_mov_b32_e32 v6, s12 ; GISEL-NEXT: v_mov_b32_e32 v7, s13 ; GISEL-NEXT: v_mov_b32_e32 v9, s14 ; GISEL-NEXT: v_mov_b32_e32 v10, s15 ; GISEL-NEXT: v_mov_b32_e32 v11, s16 ; GISEL-NEXT: v_mov_b32_e32 v13, s17 ; GISEL-NEXT: v_mov_b32_e32 v14, s18 ; GISEL-NEXT: v_mov_b32_e32 v15, s19 ; GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm %ptr = load ptr addrspace(1), ptr addrspace(4) poison %val = load <16 x i8>, ptr addrspace(1) %ptr call void @external_void_func_v16i8(<16 x i8> %val) ret void } define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { ; VI-LABEL: stack_passed_arg_alignment_v32i32_f64: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s54, -1 ; VI-NEXT: s_mov_b32 s55, 0xe80000 ; VI-NEXT: s_add_u32 s52, s52, s5 ; VI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 ; VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_addc_u32 s53, s53, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s23 ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 ; VI-NEXT: v_mov_b32_e32 v0, s5 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b64 s[0:1], s[52:53] ; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; VI-NEXT: s_mov_b64 s[2:3], s[54:55] ; VI-NEXT: v_mov_b32_e32 v0, s36 ; VI-NEXT: v_mov_b32_e32 v1, s37 ; VI-NEXT: v_mov_b32_e32 v2, s38 ; VI-NEXT: v_mov_b32_e32 v3, s39 ; VI-NEXT: v_mov_b32_e32 v4, s40 ; VI-NEXT: v_mov_b32_e32 v5, s41 ; VI-NEXT: v_mov_b32_e32 v6, s42 ; VI-NEXT: v_mov_b32_e32 v7, s43 ; VI-NEXT: v_mov_b32_e32 v8, s44 ; VI-NEXT: v_mov_b32_e32 v9, s45 ; VI-NEXT: v_mov_b32_e32 v10, s46 ; VI-NEXT: v_mov_b32_e32 v11, s47 ; VI-NEXT: v_mov_b32_e32 v12, s48 ; VI-NEXT: v_mov_b32_e32 v13, s49 ; VI-NEXT: v_mov_b32_e32 v14, s50 ; VI-NEXT: v_mov_b32_e32 v15, s51 ; VI-NEXT: v_mov_b32_e32 v16, s8 ; VI-NEXT: v_mov_b32_e32 v17, s9 ; VI-NEXT: v_mov_b32_e32 v18, s10 ; VI-NEXT: v_mov_b32_e32 v19, s11 ; VI-NEXT: v_mov_b32_e32 v20, s12 ; VI-NEXT: v_mov_b32_e32 v21, s13 ; VI-NEXT: v_mov_b32_e32 v22, s14 ; VI-NEXT: v_mov_b32_e32 v23, s15 ; VI-NEXT: v_mov_b32_e32 v24, s16 ; VI-NEXT: v_mov_b32_e32 v25, s17 ; VI-NEXT: v_mov_b32_e32 v26, s18 ; VI-NEXT: v_mov_b32_e32 v27, s19 ; VI-NEXT: v_mov_b32_e32 v28, s20 ; VI-NEXT: v_mov_b32_e32 v29, s21 ; VI-NEXT: v_mov_b32_e32 v30, s22 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: stack_passed_arg_alignment_v32i32_f64: ; CI: ; %bb.0: ; %entry ; CI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s54, -1 ; CI-NEXT: s_mov_b32 s55, 0xe8f000 ; CI-NEXT: s_add_u32 s52, s52, s5 ; CI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x19 ; CI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x29 ; CI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x9 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_addc_u32 s53, s53, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s23 ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; CI-NEXT: v_mov_b32_e32 v0, s4 ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 ; CI-NEXT: v_mov_b32_e32 v0, s5 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b64 s[0:1], s[52:53] ; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; CI-NEXT: s_mov_b64 s[2:3], s[54:55] ; CI-NEXT: v_mov_b32_e32 v0, s36 ; CI-NEXT: v_mov_b32_e32 v1, s37 ; CI-NEXT: v_mov_b32_e32 v2, s38 ; CI-NEXT: v_mov_b32_e32 v3, s39 ; CI-NEXT: v_mov_b32_e32 v4, s40 ; CI-NEXT: v_mov_b32_e32 v5, s41 ; CI-NEXT: v_mov_b32_e32 v6, s42 ; CI-NEXT: v_mov_b32_e32 v7, s43 ; CI-NEXT: v_mov_b32_e32 v8, s44 ; CI-NEXT: v_mov_b32_e32 v9, s45 ; CI-NEXT: v_mov_b32_e32 v10, s46 ; CI-NEXT: v_mov_b32_e32 v11, s47 ; CI-NEXT: v_mov_b32_e32 v12, s48 ; CI-NEXT: v_mov_b32_e32 v13, s49 ; CI-NEXT: v_mov_b32_e32 v14, s50 ; CI-NEXT: v_mov_b32_e32 v15, s51 ; CI-NEXT: v_mov_b32_e32 v16, s8 ; CI-NEXT: v_mov_b32_e32 v17, s9 ; CI-NEXT: v_mov_b32_e32 v18, s10 ; CI-NEXT: v_mov_b32_e32 v19, s11 ; CI-NEXT: v_mov_b32_e32 v20, s12 ; CI-NEXT: v_mov_b32_e32 v21, s13 ; CI-NEXT: v_mov_b32_e32 v22, s14 ; CI-NEXT: v_mov_b32_e32 v23, s15 ; CI-NEXT: v_mov_b32_e32 v24, s16 ; CI-NEXT: v_mov_b32_e32 v25, s17 ; CI-NEXT: v_mov_b32_e32 v26, s18 ; CI-NEXT: v_mov_b32_e32 v27, s19 ; CI-NEXT: v_mov_b32_e32 v28, s20 ; CI-NEXT: v_mov_b32_e32 v29, s21 ; CI-NEXT: v_mov_b32_e32 v30, s22 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; SDAG-LABEL: stack_passed_arg_alignment_v32i32_f64: ; SDAG: ; %bb.0: ; %entry ; SDAG-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 ; SDAG-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 ; SDAG-NEXT: s_mov_b32 s54, -1 ; SDAG-NEXT: s_mov_b32 s55, 0xe00000 ; SDAG-NEXT: s_add_u32 s52, s52, s5 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 ; SDAG-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 ; SDAG-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 ; SDAG-NEXT: s_mov_b32 s32, 0 ; SDAG-NEXT: s_addc_u32 s53, s53, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_mov_b32_e32 v0, s23 ; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; SDAG-NEXT: v_mov_b32_e32 v0, s4 ; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 ; SDAG-NEXT: v_mov_b32_e32 v0, s5 ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1] ; SDAG-NEXT: s_mov_b64 s[0:1], s[52:53] ; SDAG-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; SDAG-NEXT: s_mov_b64 s[2:3], s[54:55] ; SDAG-NEXT: v_mov_b32_e32 v0, s36 ; SDAG-NEXT: v_mov_b32_e32 v1, s37 ; SDAG-NEXT: v_mov_b32_e32 v2, s38 ; SDAG-NEXT: v_mov_b32_e32 v3, s39 ; SDAG-NEXT: v_mov_b32_e32 v4, s40 ; SDAG-NEXT: v_mov_b32_e32 v5, s41 ; SDAG-NEXT: v_mov_b32_e32 v6, s42 ; SDAG-NEXT: v_mov_b32_e32 v7, s43 ; SDAG-NEXT: v_mov_b32_e32 v8, s44 ; SDAG-NEXT: v_mov_b32_e32 v9, s45 ; SDAG-NEXT: v_mov_b32_e32 v10, s46 ; SDAG-NEXT: v_mov_b32_e32 v11, s47 ; SDAG-NEXT: v_mov_b32_e32 v12, s48 ; SDAG-NEXT: v_mov_b32_e32 v13, s49 ; SDAG-NEXT: v_mov_b32_e32 v14, s50 ; SDAG-NEXT: v_mov_b32_e32 v15, s51 ; SDAG-NEXT: v_mov_b32_e32 v16, s8 ; SDAG-NEXT: v_mov_b32_e32 v17, s9 ; SDAG-NEXT: v_mov_b32_e32 v18, s10 ; SDAG-NEXT: v_mov_b32_e32 v19, s11 ; SDAG-NEXT: v_mov_b32_e32 v20, s12 ; SDAG-NEXT: v_mov_b32_e32 v21, s13 ; SDAG-NEXT: v_mov_b32_e32 v22, s14 ; SDAG-NEXT: v_mov_b32_e32 v23, s15 ; SDAG-NEXT: v_mov_b32_e32 v24, s16 ; SDAG-NEXT: v_mov_b32_e32 v25, s17 ; SDAG-NEXT: v_mov_b32_e32 v26, s18 ; SDAG-NEXT: v_mov_b32_e32 v27, s19 ; SDAG-NEXT: v_mov_b32_e32 v28, s20 ; SDAG-NEXT: v_mov_b32_e32 v29, s21 ; SDAG-NEXT: v_mov_b32_e32 v30, s22 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x2 ; GFX11-NEXT: s_load_b64 s[20:21], s[2:3], 0xa4 ; GFX11-NEXT: s_load_b512 s[4:19], s[2:3], 0x64 ; GFX11-NEXT: s_load_b512 s[36:51], s[2:3], 0x24 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s22, s32, 8 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s21 :: v_dual_mov_b32 v1, s20 ; GFX11-NEXT: v_mov_b32_e32 v2, s19 ; GFX11-NEXT: s_add_i32 s19, s32, 4 ; GFX11-NEXT: v_dual_mov_b32 v4, s40 :: v_dual_mov_b32 v7, s43 ; GFX11-NEXT: scratch_store_b32 off, v0, s22 ; GFX11-NEXT: scratch_store_b32 off, v1, s19 ; GFX11-NEXT: scratch_store_b32 off, v2, s32 ; GFX11-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s39 ; GFX11-NEXT: v_dual_mov_b32 v1, s37 :: v_dual_mov_b32 v2, s38 ; GFX11-NEXT: v_dual_mov_b32 v5, s41 :: v_dual_mov_b32 v6, s42 ; GFX11-NEXT: v_dual_mov_b32 v9, s45 :: v_dual_mov_b32 v8, s44 ; GFX11-NEXT: v_dual_mov_b32 v11, s47 :: v_dual_mov_b32 v10, s46 ; GFX11-NEXT: v_dual_mov_b32 v13, s49 :: v_dual_mov_b32 v12, s48 ; GFX11-NEXT: v_dual_mov_b32 v15, s51 :: v_dual_mov_b32 v14, s50 ; GFX11-NEXT: v_dual_mov_b32 v17, s5 :: v_dual_mov_b32 v16, s4 ; GFX11-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6 ; GFX11-NEXT: v_dual_mov_b32 v21, s9 :: v_dual_mov_b32 v20, s8 ; GFX11-NEXT: v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v22, s10 ; GFX11-NEXT: v_dual_mov_b32 v25, s13 :: v_dual_mov_b32 v24, s12 ; GFX11-NEXT: v_dual_mov_b32 v27, s15 :: v_dual_mov_b32 v26, s14 ; GFX11-NEXT: v_dual_mov_b32 v29, s17 :: v_dual_mov_b32 v28, s16 ; GFX11-NEXT: v_mov_b32_e32 v30, s18 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, stack_passed_f64_arg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, stack_passed_f64_arg@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: stack_passed_arg_alignment_v32i32_f64: ; HSA: ; %bb.0: ; %entry ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_load_dwordx16 s[8:23], s[6:7], 0x40 ; HSA-NEXT: s_load_dwordx2 s[24:25], s[6:7], 0x80 ; HSA-NEXT: s_load_dwordx16 s[36:51], s[6:7], 0x0 ; HSA-NEXT: s_mov_b32 s32, 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_waitcnt lgkmcnt(0) ; HSA-NEXT: v_mov_b32_e32 v0, s23 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, s24 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; HSA-NEXT: v_mov_b32_e32 v0, s25 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: s_getpc_b64 s[24:25] ; HSA-NEXT: s_add_u32 s24, s24, stack_passed_f64_arg@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s25, s25, stack_passed_f64_arg@rel32@hi+12 ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, s36 ; HSA-NEXT: v_mov_b32_e32 v1, s37 ; HSA-NEXT: v_mov_b32_e32 v2, s38 ; HSA-NEXT: v_mov_b32_e32 v3, s39 ; HSA-NEXT: v_mov_b32_e32 v4, s40 ; HSA-NEXT: v_mov_b32_e32 v5, s41 ; HSA-NEXT: v_mov_b32_e32 v6, s42 ; HSA-NEXT: v_mov_b32_e32 v7, s43 ; HSA-NEXT: v_mov_b32_e32 v8, s44 ; HSA-NEXT: v_mov_b32_e32 v9, s45 ; HSA-NEXT: v_mov_b32_e32 v10, s46 ; HSA-NEXT: v_mov_b32_e32 v11, s47 ; HSA-NEXT: v_mov_b32_e32 v12, s48 ; HSA-NEXT: v_mov_b32_e32 v13, s49 ; HSA-NEXT: v_mov_b32_e32 v14, s50 ; HSA-NEXT: v_mov_b32_e32 v15, s51 ; HSA-NEXT: v_mov_b32_e32 v16, s8 ; HSA-NEXT: v_mov_b32_e32 v17, s9 ; HSA-NEXT: v_mov_b32_e32 v18, s10 ; HSA-NEXT: v_mov_b32_e32 v19, s11 ; HSA-NEXT: v_mov_b32_e32 v20, s12 ; HSA-NEXT: v_mov_b32_e32 v21, s13 ; HSA-NEXT: v_mov_b32_e32 v22, s14 ; HSA-NEXT: v_mov_b32_e32 v23, s15 ; HSA-NEXT: v_mov_b32_e32 v24, s16 ; HSA-NEXT: v_mov_b32_e32 v25, s17 ; HSA-NEXT: v_mov_b32_e32 v26, s18 ; HSA-NEXT: v_mov_b32_e32 v27, s19 ; HSA-NEXT: v_mov_b32_e32 v28, s20 ; HSA-NEXT: v_mov_b32_e32 v29, s21 ; HSA-NEXT: v_mov_b32_e32 v30, s22 ; HSA-NEXT: s_swappc_b64 s[30:31], s[24:25] ; HSA-NEXT: s_endpgm ; ; GISEL-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 ; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 ; GISEL-NEXT: s_mov_b32 s54, -1 ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1] ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xa4 ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 ; GISEL-NEXT: s_mov_b32 s55, 0xe00000 ; GISEL-NEXT: s_add_u32 s52, s52, s5 ; GISEL-NEXT: s_mov_b32 s32, 0 ; GISEL-NEXT: s_addc_u32 s53, s53, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b32_e32 v0, s23 ; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 ; GISEL-NEXT: v_mov_b32_e32 v0, s1 ; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53] ; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; GISEL-NEXT: v_mov_b32_e32 v0, s36 ; GISEL-NEXT: v_mov_b32_e32 v1, s37 ; GISEL-NEXT: v_mov_b32_e32 v2, s38 ; GISEL-NEXT: v_mov_b32_e32 v3, s39 ; GISEL-NEXT: v_mov_b32_e32 v4, s40 ; GISEL-NEXT: v_mov_b32_e32 v5, s41 ; GISEL-NEXT: v_mov_b32_e32 v6, s42 ; GISEL-NEXT: v_mov_b32_e32 v7, s43 ; GISEL-NEXT: v_mov_b32_e32 v8, s44 ; GISEL-NEXT: v_mov_b32_e32 v9, s45 ; GISEL-NEXT: v_mov_b32_e32 v10, s46 ; GISEL-NEXT: v_mov_b32_e32 v11, s47 ; GISEL-NEXT: v_mov_b32_e32 v12, s48 ; GISEL-NEXT: v_mov_b32_e32 v13, s49 ; GISEL-NEXT: v_mov_b32_e32 v14, s50 ; GISEL-NEXT: v_mov_b32_e32 v15, s51 ; GISEL-NEXT: v_mov_b32_e32 v16, s8 ; GISEL-NEXT: v_mov_b32_e32 v17, s9 ; GISEL-NEXT: v_mov_b32_e32 v18, s10 ; GISEL-NEXT: v_mov_b32_e32 v19, s11 ; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55] ; GISEL-NEXT: v_mov_b32_e32 v20, s12 ; GISEL-NEXT: v_mov_b32_e32 v21, s13 ; GISEL-NEXT: v_mov_b32_e32 v22, s14 ; GISEL-NEXT: v_mov_b32_e32 v23, s15 ; GISEL-NEXT: v_mov_b32_e32 v24, s16 ; GISEL-NEXT: v_mov_b32_e32 v25, s17 ; GISEL-NEXT: v_mov_b32_e32 v26, s18 ; GISEL-NEXT: v_mov_b32_e32 v27, s19 ; GISEL-NEXT: v_mov_b32_e32 v28, s20 ; GISEL-NEXT: v_mov_b32_e32 v29, s21 ; GISEL-NEXT: v_mov_b32_e32 v30, s22 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-NEXT: s_endpgm entry: call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) ret void } define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; VI-LABEL: tail_call_byval_align16: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 ; VI-NEXT: s_setpc_b64 s[4:5] ; ; CI-LABEL: tail_call_byval_align16: ; CI: ; %bb.0: ; %entry ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 ; CI-NEXT: s_waitcnt vmcnt(2) ; CI-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 ; CI-NEXT: s_setpc_b64 s[4:5] ; ; SDAG-LABEL: tail_call_byval_align16: ; SDAG: ; %bb.0: ; %entry ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 ; SDAG-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; SDAG-NEXT: s_getpc_b64 s[4:5] ; SDAG-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 ; SDAG-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; SDAG-NEXT: s_waitcnt vmcnt(1) ; SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 ; SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 ; SDAG-NEXT: s_waitcnt vmcnt(2) ; SDAG-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; SDAG-NEXT: s_waitcnt vmcnt(1) ; SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 ; SDAG-NEXT: s_setpc_b64 s[4:5] ; ; GFX11-LABEL: tail_call_byval_align16: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_store_b32 off, v31, s32 ; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:24 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_store_b64 off, v[31:32], s32 offset:16 ; GFX11-NEXT: s_setpc_b64 s[0:1] ; ; HSA-LABEL: tail_call_byval_align16: ; HSA: ; %bb.0: ; %entry ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 ; HSA-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; HSA-NEXT: s_waitcnt vmcnt(1) ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 ; HSA-NEXT: s_waitcnt vmcnt(2) ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; HSA-NEXT: s_waitcnt vmcnt(1) ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 ; HSA-NEXT: s_setpc_b64 s[4:5] ; ; GISEL-LABEL: tail_call_byval_align16: ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GISEL-NEXT: s_getpc_b64 s[4:5] ; GISEL-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 ; GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 ; GISEL-NEXT: s_setpc_b64 s[4:5] entry: %alloca = alloca double, align 8, addrspace(5) tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca) ret void } define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { ; VI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; VI-NEXT: s_setpc_b64 s[4:5] ; ; CI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: ; CI: ; %bb.0: ; %entry ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; CI-NEXT: s_waitcnt vmcnt(2) ; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; CI-NEXT: s_waitcnt vmcnt(2) ; CI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(2) ; CI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; CI-NEXT: s_setpc_b64 s[4:5] ; ; GFX9-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; GFX9-NEXT: s_setpc_b64 s[4:5] ; ; GFX11-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 ; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:4 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: scratch_store_b32 off, v33, s32 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_store_b64 off, v[31:32], s32 offset:4 ; GFX11-NEXT: s_setpc_b64 s[0:1] ; ; HSA-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: ; HSA: ; %bb.0: ; %entry ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; HSA-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; HSA-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; HSA-NEXT: s_waitcnt vmcnt(2) ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 ; HSA-NEXT: s_waitcnt vmcnt(2) ; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; HSA-NEXT: s_waitcnt vmcnt(2) ; HSA-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; HSA-NEXT: s_setpc_b64 s[4:5] entry: tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) ret void } define void @stack_12xv3i32() #0 { ; VI-LABEL: stack_12xv3i32: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_mov_b32 s4, s33 ; VI-NEXT: s_mov_b32 s33, s32 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] ; VI-NEXT: s_addk_i32 s32, 0x400 ; VI-NEXT: v_mov_b32_e32 v0, 11 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 12 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; VI-NEXT: v_mov_b32_e32 v0, 13 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; VI-NEXT: v_mov_b32_e32 v0, 14 ; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 15 ; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_mov_b32_e32 v2, 0 ; VI-NEXT: v_mov_b32_e32 v3, 1 ; VI-NEXT: v_mov_b32_e32 v4, 1 ; VI-NEXT: v_mov_b32_e32 v5, 1 ; VI-NEXT: v_mov_b32_e32 v6, 2 ; VI-NEXT: v_mov_b32_e32 v7, 2 ; VI-NEXT: v_mov_b32_e32 v8, 2 ; VI-NEXT: v_mov_b32_e32 v9, 3 ; VI-NEXT: v_mov_b32_e32 v10, 3 ; VI-NEXT: v_mov_b32_e32 v11, 3 ; VI-NEXT: v_mov_b32_e32 v12, 4 ; VI-NEXT: v_mov_b32_e32 v13, 4 ; VI-NEXT: v_mov_b32_e32 v14, 4 ; VI-NEXT: v_mov_b32_e32 v15, 5 ; VI-NEXT: v_mov_b32_e32 v16, 5 ; VI-NEXT: v_mov_b32_e32 v17, 5 ; VI-NEXT: v_mov_b32_e32 v18, 6 ; VI-NEXT: v_mov_b32_e32 v19, 6 ; VI-NEXT: v_mov_b32_e32 v20, 6 ; VI-NEXT: v_mov_b32_e32 v21, 7 ; VI-NEXT: v_mov_b32_e32 v22, 7 ; VI-NEXT: v_mov_b32_e32 v23, 7 ; VI-NEXT: v_mov_b32_e32 v24, 8 ; VI-NEXT: v_mov_b32_e32 v25, 8 ; VI-NEXT: v_mov_b32_e32 v26, 8 ; VI-NEXT: v_mov_b32_e32 v27, 9 ; VI-NEXT: v_mov_b32_e32 v28, 9 ; VI-NEXT: v_mov_b32_e32 v29, 9 ; VI-NEXT: v_mov_b32_e32 v30, 10 ; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[6:7] ; VI-NEXT: s_mov_b32 s33, s4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; CI-LABEL: stack_12xv3i32: ; CI: ; %bb.0: ; %entry ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: s_mov_b32 s4, s33 ; CI-NEXT: s_mov_b32 s33, s32 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] ; CI-NEXT: s_addk_i32 s32, 0x400 ; CI-NEXT: v_mov_b32_e32 v0, 11 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 12 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; CI-NEXT: v_mov_b32_e32 v0, 13 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; CI-NEXT: v_mov_b32_e32 v0, 14 ; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 15 ; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: v_mov_b32_e32 v3, 1 ; CI-NEXT: v_mov_b32_e32 v4, 1 ; CI-NEXT: v_mov_b32_e32 v5, 1 ; CI-NEXT: v_mov_b32_e32 v6, 2 ; CI-NEXT: v_mov_b32_e32 v7, 2 ; CI-NEXT: v_mov_b32_e32 v8, 2 ; CI-NEXT: v_mov_b32_e32 v9, 3 ; CI-NEXT: v_mov_b32_e32 v10, 3 ; CI-NEXT: v_mov_b32_e32 v11, 3 ; CI-NEXT: v_mov_b32_e32 v12, 4 ; CI-NEXT: v_mov_b32_e32 v13, 4 ; CI-NEXT: v_mov_b32_e32 v14, 4 ; CI-NEXT: v_mov_b32_e32 v15, 5 ; CI-NEXT: v_mov_b32_e32 v16, 5 ; CI-NEXT: v_mov_b32_e32 v17, 5 ; CI-NEXT: v_mov_b32_e32 v18, 6 ; CI-NEXT: v_mov_b32_e32 v19, 6 ; CI-NEXT: v_mov_b32_e32 v20, 6 ; CI-NEXT: v_mov_b32_e32 v21, 7 ; CI-NEXT: v_mov_b32_e32 v22, 7 ; CI-NEXT: v_mov_b32_e32 v23, 7 ; CI-NEXT: v_mov_b32_e32 v24, 8 ; CI-NEXT: v_mov_b32_e32 v25, 8 ; CI-NEXT: v_mov_b32_e32 v26, 8 ; CI-NEXT: v_mov_b32_e32 v27, 9 ; CI-NEXT: v_mov_b32_e32 v28, 9 ; CI-NEXT: v_mov_b32_e32 v29, 9 ; CI-NEXT: v_mov_b32_e32 v30, 10 ; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CI-NEXT: s_mov_b64 exec, s[6:7] ; CI-NEXT: s_mov_b32 s33, s4 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: stack_12xv3i32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 11 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 12 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 1 ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 1 ; GFX9-NEXT: v_mov_b32_e32 v6, 2 ; GFX9-NEXT: v_mov_b32_e32 v7, 2 ; GFX9-NEXT: v_mov_b32_e32 v8, 2 ; GFX9-NEXT: v_mov_b32_e32 v9, 3 ; GFX9-NEXT: v_mov_b32_e32 v10, 3 ; GFX9-NEXT: v_mov_b32_e32 v11, 3 ; GFX9-NEXT: v_mov_b32_e32 v12, 4 ; GFX9-NEXT: v_mov_b32_e32 v13, 4 ; GFX9-NEXT: v_mov_b32_e32 v14, 4 ; GFX9-NEXT: v_mov_b32_e32 v15, 5 ; GFX9-NEXT: v_mov_b32_e32 v16, 5 ; GFX9-NEXT: v_mov_b32_e32 v17, 5 ; GFX9-NEXT: v_mov_b32_e32 v18, 6 ; GFX9-NEXT: v_mov_b32_e32 v19, 6 ; GFX9-NEXT: v_mov_b32_e32 v20, 6 ; GFX9-NEXT: v_mov_b32_e32 v21, 7 ; GFX9-NEXT: v_mov_b32_e32 v22, 7 ; GFX9-NEXT: v_mov_b32_e32 v23, 7 ; GFX9-NEXT: v_mov_b32_e32 v24, 8 ; GFX9-NEXT: v_mov_b32_e32 v25, 8 ; GFX9-NEXT: v_mov_b32_e32 v26, 8 ; GFX9-NEXT: v_mov_b32_e32 v27, 9 ; GFX9-NEXT: v_mov_b32_e32 v28, 9 ; GFX9-NEXT: v_mov_b32_e32 v29, 9 ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: stack_12xv3i32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 11 :: v_dual_mov_b32 v1, 12 ; GFX11-NEXT: v_dual_mov_b32 v2, 13 :: v_dual_mov_b32 v3, 14 ; GFX11-NEXT: v_mov_b32_e32 v4, 15 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v4, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, 1 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: v_dual_mov_b32 v5, 1 :: v_dual_mov_b32 v4, 1 ; GFX11-NEXT: v_dual_mov_b32 v7, 2 :: v_dual_mov_b32 v6, 2 ; GFX11-NEXT: v_dual_mov_b32 v9, 3 :: v_dual_mov_b32 v8, 2 ; GFX11-NEXT: v_dual_mov_b32 v11, 3 :: v_dual_mov_b32 v10, 3 ; GFX11-NEXT: v_dual_mov_b32 v13, 4 :: v_dual_mov_b32 v12, 4 ; GFX11-NEXT: v_dual_mov_b32 v15, 5 :: v_dual_mov_b32 v14, 4 ; GFX11-NEXT: v_dual_mov_b32 v17, 5 :: v_dual_mov_b32 v16, 5 ; GFX11-NEXT: v_dual_mov_b32 v19, 6 :: v_dual_mov_b32 v18, 6 ; GFX11-NEXT: v_dual_mov_b32 v21, 7 :: v_dual_mov_b32 v20, 6 ; GFX11-NEXT: v_dual_mov_b32 v23, 7 :: v_dual_mov_b32 v22, 7 ; GFX11-NEXT: v_dual_mov_b32 v25, 8 :: v_dual_mov_b32 v24, 8 ; GFX11-NEXT: v_dual_mov_b32 v27, 9 :: v_dual_mov_b32 v26, 8 ; GFX11-NEXT: v_dual_mov_b32 v29, 9 :: v_dual_mov_b32 v28, 9 ; GFX11-NEXT: v_mov_b32_e32 v30, 10 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; HSA-LABEL: stack_12xv3i32: ; HSA: ; %bb.0: ; %entry ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; HSA-NEXT: s_mov_b32 s4, s33 ; HSA-NEXT: s_mov_b32 s33, s32 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] ; HSA-NEXT: s_addk_i32 s32, 0x400 ; HSA-NEXT: v_mov_b32_e32 v0, 11 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 12 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; HSA-NEXT: v_mov_b32_e32 v0, 13 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: v_mov_b32_e32 v0, 14 ; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 15 ; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 ; HSA-NEXT: v_mov_b32_e32 v0, 0 ; HSA-NEXT: v_mov_b32_e32 v1, 0 ; HSA-NEXT: v_mov_b32_e32 v2, 0 ; HSA-NEXT: v_mov_b32_e32 v3, 1 ; HSA-NEXT: v_mov_b32_e32 v4, 1 ; HSA-NEXT: v_mov_b32_e32 v5, 1 ; HSA-NEXT: v_mov_b32_e32 v6, 2 ; HSA-NEXT: v_mov_b32_e32 v7, 2 ; HSA-NEXT: v_mov_b32_e32 v8, 2 ; HSA-NEXT: v_mov_b32_e32 v9, 3 ; HSA-NEXT: v_mov_b32_e32 v10, 3 ; HSA-NEXT: v_mov_b32_e32 v11, 3 ; HSA-NEXT: v_mov_b32_e32 v12, 4 ; HSA-NEXT: v_mov_b32_e32 v13, 4 ; HSA-NEXT: v_mov_b32_e32 v14, 4 ; HSA-NEXT: v_mov_b32_e32 v15, 5 ; HSA-NEXT: v_mov_b32_e32 v16, 5 ; HSA-NEXT: v_mov_b32_e32 v17, 5 ; HSA-NEXT: v_mov_b32_e32 v18, 6 ; HSA-NEXT: v_mov_b32_e32 v19, 6 ; HSA-NEXT: v_mov_b32_e32 v20, 6 ; HSA-NEXT: v_mov_b32_e32 v21, 7 ; HSA-NEXT: v_mov_b32_e32 v22, 7 ; HSA-NEXT: v_mov_b32_e32 v23, 7 ; HSA-NEXT: v_mov_b32_e32 v24, 8 ; HSA-NEXT: v_mov_b32_e32 v25, 8 ; HSA-NEXT: v_mov_b32_e32 v26, 8 ; HSA-NEXT: v_mov_b32_e32 v27, 9 ; HSA-NEXT: v_mov_b32_e32 v28, 9 ; HSA-NEXT: v_mov_b32_e32 v29, 9 ; HSA-NEXT: v_mov_b32_e32 v30, 10 ; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] ; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; HSA-NEXT: s_mov_b64 exec, s[6:7] ; HSA-NEXT: s_mov_b32 s33, s4 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_setpc_b64 s[30:31] entry: call void @external_void_func_12xv3i32( <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) ret void } define void @stack_12xv3f32() #0 { ; VI-LABEL: stack_12xv3f32: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_mov_b32 s4, s33 ; VI-NEXT: s_mov_b32 s33, s32 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] ; VI-NEXT: s_addk_i32 s32, 0x400 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 0x41400000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 ; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 ; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12 ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_mov_b32_e32 v2, 0 ; VI-NEXT: v_mov_b32_e32 v3, 1.0 ; VI-NEXT: v_mov_b32_e32 v4, 1.0 ; VI-NEXT: v_mov_b32_e32 v5, 1.0 ; VI-NEXT: v_mov_b32_e32 v6, 2.0 ; VI-NEXT: v_mov_b32_e32 v7, 2.0 ; VI-NEXT: v_mov_b32_e32 v8, 2.0 ; VI-NEXT: v_mov_b32_e32 v9, 0x40400000 ; VI-NEXT: v_mov_b32_e32 v10, 0x40400000 ; VI-NEXT: v_mov_b32_e32 v11, 0x40400000 ; VI-NEXT: v_mov_b32_e32 v12, 4.0 ; VI-NEXT: v_mov_b32_e32 v13, 4.0 ; VI-NEXT: v_mov_b32_e32 v14, 4.0 ; VI-NEXT: v_mov_b32_e32 v15, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v16, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v17, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v18, 0x40c00000 ; VI-NEXT: v_mov_b32_e32 v19, 0x40c00000 ; VI-NEXT: v_mov_b32_e32 v20, 0x40c00000 ; VI-NEXT: v_mov_b32_e32 v21, 0x40e00000 ; VI-NEXT: v_mov_b32_e32 v22, 0x40e00000 ; VI-NEXT: v_mov_b32_e32 v23, 0x40e00000 ; VI-NEXT: v_mov_b32_e32 v24, 0x41000000 ; VI-NEXT: v_mov_b32_e32 v25, 0x41000000 ; VI-NEXT: v_mov_b32_e32 v26, 0x41000000 ; VI-NEXT: v_mov_b32_e32 v27, 0x41100000 ; VI-NEXT: v_mov_b32_e32 v28, 0x41100000 ; VI-NEXT: v_mov_b32_e32 v29, 0x41100000 ; VI-NEXT: v_mov_b32_e32 v30, 0x41200000 ; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[6:7] ; VI-NEXT: s_mov_b32 s33, s4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; CI-LABEL: stack_12xv3f32: ; CI: ; %bb.0: ; %entry ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: s_mov_b32 s4, s33 ; CI-NEXT: s_mov_b32 s33, s32 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] ; CI-NEXT: s_addk_i32 s32, 0x400 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 0x41400000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 ; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 ; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12 ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: v_mov_b32_e32 v3, 1.0 ; CI-NEXT: v_mov_b32_e32 v4, 1.0 ; CI-NEXT: v_mov_b32_e32 v5, 1.0 ; CI-NEXT: v_mov_b32_e32 v6, 2.0 ; CI-NEXT: v_mov_b32_e32 v7, 2.0 ; CI-NEXT: v_mov_b32_e32 v8, 2.0 ; CI-NEXT: v_mov_b32_e32 v9, 0x40400000 ; CI-NEXT: v_mov_b32_e32 v10, 0x40400000 ; CI-NEXT: v_mov_b32_e32 v11, 0x40400000 ; CI-NEXT: v_mov_b32_e32 v12, 4.0 ; CI-NEXT: v_mov_b32_e32 v13, 4.0 ; CI-NEXT: v_mov_b32_e32 v14, 4.0 ; CI-NEXT: v_mov_b32_e32 v15, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v16, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v17, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v18, 0x40c00000 ; CI-NEXT: v_mov_b32_e32 v19, 0x40c00000 ; CI-NEXT: v_mov_b32_e32 v20, 0x40c00000 ; CI-NEXT: v_mov_b32_e32 v21, 0x40e00000 ; CI-NEXT: v_mov_b32_e32 v22, 0x40e00000 ; CI-NEXT: v_mov_b32_e32 v23, 0x40e00000 ; CI-NEXT: v_mov_b32_e32 v24, 0x41000000 ; CI-NEXT: v_mov_b32_e32 v25, 0x41000000 ; CI-NEXT: v_mov_b32_e32 v26, 0x41000000 ; CI-NEXT: v_mov_b32_e32 v27, 0x41100000 ; CI-NEXT: v_mov_b32_e32 v28, 0x41100000 ; CI-NEXT: v_mov_b32_e32 v29, 0x41100000 ; CI-NEXT: v_mov_b32_e32 v30, 0x41200000 ; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CI-NEXT: s_mov_b64 exec, s[6:7] ; CI-NEXT: s_mov_b32 s33, s4 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: stack_12xv3f32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v6, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v7, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v8, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v9, 0x40400000 ; GFX9-NEXT: v_mov_b32_e32 v10, 0x40400000 ; GFX9-NEXT: v_mov_b32_e32 v11, 0x40400000 ; GFX9-NEXT: v_mov_b32_e32 v12, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v13, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v14, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v20, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v21, 0x40e00000 ; GFX9-NEXT: v_mov_b32_e32 v22, 0x40e00000 ; GFX9-NEXT: v_mov_b32_e32 v23, 0x40e00000 ; GFX9-NEXT: v_mov_b32_e32 v24, 0x41000000 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x41000000 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x41000000 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x41100000 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x41100000 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x41100000 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x41200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: stack_12xv3f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41400000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41500000 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41600000 ; GFX11-NEXT: v_dual_mov_b32 v4, 0x41700000 :: v_dual_mov_b32 v5, 1.0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v4, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1.0 ; GFX11-NEXT: v_dual_mov_b32 v4, 1.0 :: v_dual_mov_b32 v7, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v6, 2.0 :: v_dual_mov_b32 v9, 0x40400000 ; GFX11-NEXT: v_dual_mov_b32 v8, 2.0 :: v_dual_mov_b32 v11, 0x40400000 ; GFX11-NEXT: v_dual_mov_b32 v10, 0x40400000 :: v_dual_mov_b32 v13, 4.0 ; GFX11-NEXT: v_dual_mov_b32 v12, 4.0 :: v_dual_mov_b32 v15, 0x40a00000 ; GFX11-NEXT: v_dual_mov_b32 v14, 4.0 :: v_dual_mov_b32 v17, 0x40a00000 ; GFX11-NEXT: v_mov_b32_e32 v16, 0x40a00000 ; GFX11-NEXT: v_dual_mov_b32 v18, 0x40c00000 :: v_dual_mov_b32 v19, 0x40c00000 ; GFX11-NEXT: v_mov_b32_e32 v20, 0x40c00000 ; GFX11-NEXT: v_dual_mov_b32 v21, 0x40e00000 :: v_dual_mov_b32 v22, 0x40e00000 ; GFX11-NEXT: v_mov_b32_e32 v23, 0x40e00000 ; GFX11-NEXT: v_dual_mov_b32 v24, 0x41000000 :: v_dual_mov_b32 v25, 0x41000000 ; GFX11-NEXT: v_mov_b32_e32 v26, 0x41000000 ; GFX11-NEXT: v_dual_mov_b32 v27, 0x41100000 :: v_dual_mov_b32 v28, 0x41100000 ; GFX11-NEXT: v_mov_b32_e32 v29, 0x41100000 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x41200000 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3f32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; HSA-LABEL: stack_12xv3f32: ; HSA: ; %bb.0: ; %entry ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; HSA-NEXT: s_mov_b32 s4, s33 ; HSA-NEXT: s_mov_b32 s33, s32 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] ; HSA-NEXT: s_addk_i32 s32, 0x400 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 ; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 ; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12 ; HSA-NEXT: v_mov_b32_e32 v0, 0 ; HSA-NEXT: v_mov_b32_e32 v1, 0 ; HSA-NEXT: v_mov_b32_e32 v2, 0 ; HSA-NEXT: v_mov_b32_e32 v3, 1.0 ; HSA-NEXT: v_mov_b32_e32 v4, 1.0 ; HSA-NEXT: v_mov_b32_e32 v5, 1.0 ; HSA-NEXT: v_mov_b32_e32 v6, 2.0 ; HSA-NEXT: v_mov_b32_e32 v7, 2.0 ; HSA-NEXT: v_mov_b32_e32 v8, 2.0 ; HSA-NEXT: v_mov_b32_e32 v9, 0x40400000 ; HSA-NEXT: v_mov_b32_e32 v10, 0x40400000 ; HSA-NEXT: v_mov_b32_e32 v11, 0x40400000 ; HSA-NEXT: v_mov_b32_e32 v12, 4.0 ; HSA-NEXT: v_mov_b32_e32 v13, 4.0 ; HSA-NEXT: v_mov_b32_e32 v14, 4.0 ; HSA-NEXT: v_mov_b32_e32 v15, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v16, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v17, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v18, 0x40c00000 ; HSA-NEXT: v_mov_b32_e32 v19, 0x40c00000 ; HSA-NEXT: v_mov_b32_e32 v20, 0x40c00000 ; HSA-NEXT: v_mov_b32_e32 v21, 0x40e00000 ; HSA-NEXT: v_mov_b32_e32 v22, 0x40e00000 ; HSA-NEXT: v_mov_b32_e32 v23, 0x40e00000 ; HSA-NEXT: v_mov_b32_e32 v24, 0x41000000 ; HSA-NEXT: v_mov_b32_e32 v25, 0x41000000 ; HSA-NEXT: v_mov_b32_e32 v26, 0x41000000 ; HSA-NEXT: v_mov_b32_e32 v27, 0x41100000 ; HSA-NEXT: v_mov_b32_e32 v28, 0x41100000 ; HSA-NEXT: v_mov_b32_e32 v29, 0x41100000 ; HSA-NEXT: v_mov_b32_e32 v30, 0x41200000 ; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] ; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; HSA-NEXT: s_mov_b64 exec, s[6:7] ; HSA-NEXT: s_mov_b32 s33, s4 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_setpc_b64 s[30:31] entry: call void @external_void_func_12xv3f32( <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) ret void } define void @stack_8xv5i32() #0 { ; VI-LABEL: stack_8xv5i32: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_mov_b32 s4, s33 ; VI-NEXT: s_mov_b32 s33, s32 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] ; VI-NEXT: s_addk_i32 s32, 0x400 ; VI-NEXT: v_mov_b32_e32 v0, 7 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 8 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; VI-NEXT: v_mov_b32_e32 v0, 9 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; VI-NEXT: v_mov_b32_e32 v0, 10 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 11 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; VI-NEXT: v_mov_b32_e32 v0, 12 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; VI-NEXT: v_mov_b32_e32 v0, 13 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; VI-NEXT: v_mov_b32_e32 v0, 14 ; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; VI-NEXT: v_mov_b32_e32 v0, 15 ; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_mov_b32_e32 v2, 0 ; VI-NEXT: v_mov_b32_e32 v3, 0 ; VI-NEXT: v_mov_b32_e32 v4, 0 ; VI-NEXT: v_mov_b32_e32 v5, 1 ; VI-NEXT: v_mov_b32_e32 v6, 1 ; VI-NEXT: v_mov_b32_e32 v7, 1 ; VI-NEXT: v_mov_b32_e32 v8, 1 ; VI-NEXT: v_mov_b32_e32 v9, 1 ; VI-NEXT: v_mov_b32_e32 v10, 2 ; VI-NEXT: v_mov_b32_e32 v11, 2 ; VI-NEXT: v_mov_b32_e32 v12, 2 ; VI-NEXT: v_mov_b32_e32 v13, 2 ; VI-NEXT: v_mov_b32_e32 v14, 2 ; VI-NEXT: v_mov_b32_e32 v15, 3 ; VI-NEXT: v_mov_b32_e32 v16, 3 ; VI-NEXT: v_mov_b32_e32 v17, 3 ; VI-NEXT: v_mov_b32_e32 v18, 3 ; VI-NEXT: v_mov_b32_e32 v19, 3 ; VI-NEXT: v_mov_b32_e32 v20, 4 ; VI-NEXT: v_mov_b32_e32 v21, 4 ; VI-NEXT: v_mov_b32_e32 v22, 4 ; VI-NEXT: v_mov_b32_e32 v23, 4 ; VI-NEXT: v_mov_b32_e32 v24, 4 ; VI-NEXT: v_mov_b32_e32 v25, 5 ; VI-NEXT: v_mov_b32_e32 v26, 5 ; VI-NEXT: v_mov_b32_e32 v27, 5 ; VI-NEXT: v_mov_b32_e32 v28, 5 ; VI-NEXT: v_mov_b32_e32 v29, 5 ; VI-NEXT: v_mov_b32_e32 v30, 6 ; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[6:7] ; VI-NEXT: s_mov_b32 s33, s4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; CI-LABEL: stack_8xv5i32: ; CI: ; %bb.0: ; %entry ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: s_mov_b32 s4, s33 ; CI-NEXT: s_mov_b32 s33, s32 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] ; CI-NEXT: s_addk_i32 s32, 0x400 ; CI-NEXT: v_mov_b32_e32 v0, 7 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 8 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; CI-NEXT: v_mov_b32_e32 v0, 9 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; CI-NEXT: v_mov_b32_e32 v0, 10 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 11 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; CI-NEXT: v_mov_b32_e32 v0, 12 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; CI-NEXT: v_mov_b32_e32 v0, 13 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; CI-NEXT: v_mov_b32_e32 v0, 14 ; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; CI-NEXT: v_mov_b32_e32 v0, 15 ; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: v_mov_b32_e32 v3, 0 ; CI-NEXT: v_mov_b32_e32 v4, 0 ; CI-NEXT: v_mov_b32_e32 v5, 1 ; CI-NEXT: v_mov_b32_e32 v6, 1 ; CI-NEXT: v_mov_b32_e32 v7, 1 ; CI-NEXT: v_mov_b32_e32 v8, 1 ; CI-NEXT: v_mov_b32_e32 v9, 1 ; CI-NEXT: v_mov_b32_e32 v10, 2 ; CI-NEXT: v_mov_b32_e32 v11, 2 ; CI-NEXT: v_mov_b32_e32 v12, 2 ; CI-NEXT: v_mov_b32_e32 v13, 2 ; CI-NEXT: v_mov_b32_e32 v14, 2 ; CI-NEXT: v_mov_b32_e32 v15, 3 ; CI-NEXT: v_mov_b32_e32 v16, 3 ; CI-NEXT: v_mov_b32_e32 v17, 3 ; CI-NEXT: v_mov_b32_e32 v18, 3 ; CI-NEXT: v_mov_b32_e32 v19, 3 ; CI-NEXT: v_mov_b32_e32 v20, 4 ; CI-NEXT: v_mov_b32_e32 v21, 4 ; CI-NEXT: v_mov_b32_e32 v22, 4 ; CI-NEXT: v_mov_b32_e32 v23, 4 ; CI-NEXT: v_mov_b32_e32 v24, 4 ; CI-NEXT: v_mov_b32_e32 v25, 5 ; CI-NEXT: v_mov_b32_e32 v26, 5 ; CI-NEXT: v_mov_b32_e32 v27, 5 ; CI-NEXT: v_mov_b32_e32 v28, 5 ; CI-NEXT: v_mov_b32_e32 v29, 5 ; CI-NEXT: v_mov_b32_e32 v30, 6 ; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CI-NEXT: s_mov_b64 exec, s[6:7] ; CI-NEXT: s_mov_b32 s33, s4 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: stack_8xv5i32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 7 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX9-NEXT: v_mov_b32_e32 v0, 9 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 10 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 11 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, 12 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 1 ; GFX9-NEXT: v_mov_b32_e32 v6, 1 ; GFX9-NEXT: v_mov_b32_e32 v7, 1 ; GFX9-NEXT: v_mov_b32_e32 v8, 1 ; GFX9-NEXT: v_mov_b32_e32 v9, 1 ; GFX9-NEXT: v_mov_b32_e32 v10, 2 ; GFX9-NEXT: v_mov_b32_e32 v11, 2 ; GFX9-NEXT: v_mov_b32_e32 v12, 2 ; GFX9-NEXT: v_mov_b32_e32 v13, 2 ; GFX9-NEXT: v_mov_b32_e32 v14, 2 ; GFX9-NEXT: v_mov_b32_e32 v15, 3 ; GFX9-NEXT: v_mov_b32_e32 v16, 3 ; GFX9-NEXT: v_mov_b32_e32 v17, 3 ; GFX9-NEXT: v_mov_b32_e32 v18, 3 ; GFX9-NEXT: v_mov_b32_e32 v19, 3 ; GFX9-NEXT: v_mov_b32_e32 v20, 4 ; GFX9-NEXT: v_mov_b32_e32 v21, 4 ; GFX9-NEXT: v_mov_b32_e32 v22, 4 ; GFX9-NEXT: v_mov_b32_e32 v23, 4 ; GFX9-NEXT: v_mov_b32_e32 v24, 4 ; GFX9-NEXT: v_mov_b32_e32 v25, 5 ; GFX9-NEXT: v_mov_b32_e32 v26, 5 ; GFX9-NEXT: v_mov_b32_e32 v27, 5 ; GFX9-NEXT: v_mov_b32_e32 v28, 5 ; GFX9-NEXT: v_mov_b32_e32 v29, 5 ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: stack_8xv5i32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, 8 ; GFX11-NEXT: v_dual_mov_b32 v2, 9 :: v_dual_mov_b32 v3, 10 ; GFX11-NEXT: v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v5, 12 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_dual_mov_b32 v4, 11 :: v_dual_mov_b32 v7, 14 ; GFX11-NEXT: v_mov_b32_e32 v6, 13 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: scratch_store_b32 off, v8, s0 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 1 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 1 ; GFX11-NEXT: v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v9, 1 ; GFX11-NEXT: v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v11, 2 ; GFX11-NEXT: v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v13, 2 ; GFX11-NEXT: v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v15, 3 ; GFX11-NEXT: v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v17, 3 ; GFX11-NEXT: v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v19, 3 ; GFX11-NEXT: v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v21, 4 ; GFX11-NEXT: v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v23, 4 ; GFX11-NEXT: v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v25, 5 ; GFX11-NEXT: v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v27, 5 ; GFX11-NEXT: v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v29, 5 ; GFX11-NEXT: v_mov_b32_e32 v28, 5 ; GFX11-NEXT: v_mov_b32_e32 v30, 6 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; HSA-LABEL: stack_8xv5i32: ; HSA: ; %bb.0: ; %entry ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; HSA-NEXT: s_mov_b32 s4, s33 ; HSA-NEXT: s_mov_b32 s33, s32 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] ; HSA-NEXT: s_addk_i32 s32, 0x400 ; HSA-NEXT: v_mov_b32_e32 v0, 7 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 8 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; HSA-NEXT: v_mov_b32_e32 v0, 9 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: v_mov_b32_e32 v0, 10 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 11 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; HSA-NEXT: v_mov_b32_e32 v0, 12 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; HSA-NEXT: v_mov_b32_e32 v0, 13 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; HSA-NEXT: v_mov_b32_e32 v0, 14 ; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; HSA-NEXT: v_mov_b32_e32 v0, 15 ; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 ; HSA-NEXT: v_mov_b32_e32 v0, 0 ; HSA-NEXT: v_mov_b32_e32 v1, 0 ; HSA-NEXT: v_mov_b32_e32 v2, 0 ; HSA-NEXT: v_mov_b32_e32 v3, 0 ; HSA-NEXT: v_mov_b32_e32 v4, 0 ; HSA-NEXT: v_mov_b32_e32 v5, 1 ; HSA-NEXT: v_mov_b32_e32 v6, 1 ; HSA-NEXT: v_mov_b32_e32 v7, 1 ; HSA-NEXT: v_mov_b32_e32 v8, 1 ; HSA-NEXT: v_mov_b32_e32 v9, 1 ; HSA-NEXT: v_mov_b32_e32 v10, 2 ; HSA-NEXT: v_mov_b32_e32 v11, 2 ; HSA-NEXT: v_mov_b32_e32 v12, 2 ; HSA-NEXT: v_mov_b32_e32 v13, 2 ; HSA-NEXT: v_mov_b32_e32 v14, 2 ; HSA-NEXT: v_mov_b32_e32 v15, 3 ; HSA-NEXT: v_mov_b32_e32 v16, 3 ; HSA-NEXT: v_mov_b32_e32 v17, 3 ; HSA-NEXT: v_mov_b32_e32 v18, 3 ; HSA-NEXT: v_mov_b32_e32 v19, 3 ; HSA-NEXT: v_mov_b32_e32 v20, 4 ; HSA-NEXT: v_mov_b32_e32 v21, 4 ; HSA-NEXT: v_mov_b32_e32 v22, 4 ; HSA-NEXT: v_mov_b32_e32 v23, 4 ; HSA-NEXT: v_mov_b32_e32 v24, 4 ; HSA-NEXT: v_mov_b32_e32 v25, 5 ; HSA-NEXT: v_mov_b32_e32 v26, 5 ; HSA-NEXT: v_mov_b32_e32 v27, 5 ; HSA-NEXT: v_mov_b32_e32 v28, 5 ; HSA-NEXT: v_mov_b32_e32 v29, 5 ; HSA-NEXT: v_mov_b32_e32 v30, 6 ; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] ; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; HSA-NEXT: s_mov_b64 exec, s[6:7] ; HSA-NEXT: s_mov_b32 s33, s4 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_setpc_b64 s[30:31] entry: call void @external_void_func_8xv5i32( <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) ret void } define void @stack_8xv5f32() #0 { ; VI-LABEL: stack_8xv5f32: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_mov_b32 s4, s33 ; VI-NEXT: s_mov_b32 s33, s32 ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] ; VI-NEXT: s_addk_i32 s32, 0x400 ; VI-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 0x41000000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; VI-NEXT: v_mov_b32_e32 v0, 0x41100000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; VI-NEXT: v_mov_b32_e32 v0, 0x41400000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 ; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 ; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_mov_b32_e32 v2, 0 ; VI-NEXT: v_mov_b32_e32 v3, 0 ; VI-NEXT: v_mov_b32_e32 v4, 0 ; VI-NEXT: v_mov_b32_e32 v5, 1.0 ; VI-NEXT: v_mov_b32_e32 v6, 1.0 ; VI-NEXT: v_mov_b32_e32 v7, 1.0 ; VI-NEXT: v_mov_b32_e32 v8, 1.0 ; VI-NEXT: v_mov_b32_e32 v9, 1.0 ; VI-NEXT: v_mov_b32_e32 v10, 2.0 ; VI-NEXT: v_mov_b32_e32 v11, 2.0 ; VI-NEXT: v_mov_b32_e32 v12, 2.0 ; VI-NEXT: v_mov_b32_e32 v13, 2.0 ; VI-NEXT: v_mov_b32_e32 v14, 2.0 ; VI-NEXT: v_mov_b32_e32 v15, 0x40400000 ; VI-NEXT: v_mov_b32_e32 v16, 0x40400000 ; VI-NEXT: v_mov_b32_e32 v17, 0x40400000 ; VI-NEXT: v_mov_b32_e32 v18, 0x40400000 ; VI-NEXT: v_mov_b32_e32 v19, 0x40400000 ; VI-NEXT: v_mov_b32_e32 v20, 4.0 ; VI-NEXT: v_mov_b32_e32 v21, 4.0 ; VI-NEXT: v_mov_b32_e32 v22, 4.0 ; VI-NEXT: v_mov_b32_e32 v23, 4.0 ; VI-NEXT: v_mov_b32_e32 v24, 4.0 ; VI-NEXT: v_mov_b32_e32 v25, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v26, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v27, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[6:7] ; VI-NEXT: s_mov_b32 s33, s4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; CI-LABEL: stack_8xv5f32: ; CI: ; %bb.0: ; %entry ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: s_mov_b32 s4, s33 ; CI-NEXT: s_mov_b32 s33, s32 ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] ; CI-NEXT: s_addk_i32 s32, 0x400 ; CI-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 0x41000000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; CI-NEXT: v_mov_b32_e32 v0, 0x41100000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; CI-NEXT: v_mov_b32_e32 v0, 0x41200000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; CI-NEXT: v_mov_b32_e32 v0, 0x41400000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 ; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 ; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: v_mov_b32_e32 v3, 0 ; CI-NEXT: v_mov_b32_e32 v4, 0 ; CI-NEXT: v_mov_b32_e32 v5, 1.0 ; CI-NEXT: v_mov_b32_e32 v6, 1.0 ; CI-NEXT: v_mov_b32_e32 v7, 1.0 ; CI-NEXT: v_mov_b32_e32 v8, 1.0 ; CI-NEXT: v_mov_b32_e32 v9, 1.0 ; CI-NEXT: v_mov_b32_e32 v10, 2.0 ; CI-NEXT: v_mov_b32_e32 v11, 2.0 ; CI-NEXT: v_mov_b32_e32 v12, 2.0 ; CI-NEXT: v_mov_b32_e32 v13, 2.0 ; CI-NEXT: v_mov_b32_e32 v14, 2.0 ; CI-NEXT: v_mov_b32_e32 v15, 0x40400000 ; CI-NEXT: v_mov_b32_e32 v16, 0x40400000 ; CI-NEXT: v_mov_b32_e32 v17, 0x40400000 ; CI-NEXT: v_mov_b32_e32 v18, 0x40400000 ; CI-NEXT: v_mov_b32_e32 v19, 0x40400000 ; CI-NEXT: v_mov_b32_e32 v20, 4.0 ; CI-NEXT: v_mov_b32_e32 v21, 4.0 ; CI-NEXT: v_mov_b32_e32 v22, 4.0 ; CI-NEXT: v_mov_b32_e32 v23, 4.0 ; CI-NEXT: v_mov_b32_e32 v24, 4.0 ; CI-NEXT: v_mov_b32_e32 v25, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v26, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v27, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CI-NEXT: s_mov_b64 exec, s[6:7] ; CI-NEXT: s_mov_b32 s33, s4 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: stack_8xv5f32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41200000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 ; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v6, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v7, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v8, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v9, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v10, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v11, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v12, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v13, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v14, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v15, 0x40400000 ; GFX9-NEXT: v_mov_b32_e32 v16, 0x40400000 ; GFX9-NEXT: v_mov_b32_e32 v17, 0x40400000 ; GFX9-NEXT: v_mov_b32_e32 v18, 0x40400000 ; GFX9-NEXT: v_mov_b32_e32 v19, 0x40400000 ; GFX9-NEXT: v_mov_b32_e32 v20, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v21, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v22, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v23, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v24, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v25, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v26, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v27, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: stack_8xv5f32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41000000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41100000 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41200000 ; GFX11-NEXT: v_mov_b32_e32 v8, 0x41700000 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_mov_b32_e32 v4, 0x41300000 ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41400000 ; GFX11-NEXT: v_dual_mov_b32 v6, 0x41500000 :: v_dual_mov_b32 v9, 1.0 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41600000 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v8, s0 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0 ; GFX11-NEXT: v_dual_mov_b32 v6, 1.0 :: v_dual_mov_b32 v7, 1.0 ; GFX11-NEXT: v_dual_mov_b32 v8, 1.0 :: v_dual_mov_b32 v11, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v10, 2.0 :: v_dual_mov_b32 v13, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v12, 2.0 :: v_dual_mov_b32 v15, 0x40400000 ; GFX11-NEXT: v_dual_mov_b32 v14, 2.0 :: v_dual_mov_b32 v17, 0x40400000 ; GFX11-NEXT: v_dual_mov_b32 v16, 0x40400000 :: v_dual_mov_b32 v19, 0x40400000 ; GFX11-NEXT: v_dual_mov_b32 v18, 0x40400000 :: v_dual_mov_b32 v21, 4.0 ; GFX11-NEXT: v_dual_mov_b32 v20, 4.0 :: v_dual_mov_b32 v23, 4.0 ; GFX11-NEXT: v_dual_mov_b32 v22, 4.0 :: v_dual_mov_b32 v25, 0x40a00000 ; GFX11-NEXT: v_dual_mov_b32 v24, 4.0 :: v_dual_mov_b32 v27, 0x40a00000 ; GFX11-NEXT: v_dual_mov_b32 v26, 0x40a00000 :: v_dual_mov_b32 v29, 0x40a00000 ; GFX11-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; HSA-LABEL: stack_8xv5f32: ; HSA: ; %bb.0: ; %entry ; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; HSA-NEXT: s_mov_b32 s4, s33 ; HSA-NEXT: s_mov_b32 s33, s32 ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] ; HSA-NEXT: s_addk_i32 s32, 0x400 ; HSA-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41000000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41100000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41200000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 ; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 ; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 ; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 ; HSA-NEXT: v_mov_b32_e32 v0, 0 ; HSA-NEXT: v_mov_b32_e32 v1, 0 ; HSA-NEXT: v_mov_b32_e32 v2, 0 ; HSA-NEXT: v_mov_b32_e32 v3, 0 ; HSA-NEXT: v_mov_b32_e32 v4, 0 ; HSA-NEXT: v_mov_b32_e32 v5, 1.0 ; HSA-NEXT: v_mov_b32_e32 v6, 1.0 ; HSA-NEXT: v_mov_b32_e32 v7, 1.0 ; HSA-NEXT: v_mov_b32_e32 v8, 1.0 ; HSA-NEXT: v_mov_b32_e32 v9, 1.0 ; HSA-NEXT: v_mov_b32_e32 v10, 2.0 ; HSA-NEXT: v_mov_b32_e32 v11, 2.0 ; HSA-NEXT: v_mov_b32_e32 v12, 2.0 ; HSA-NEXT: v_mov_b32_e32 v13, 2.0 ; HSA-NEXT: v_mov_b32_e32 v14, 2.0 ; HSA-NEXT: v_mov_b32_e32 v15, 0x40400000 ; HSA-NEXT: v_mov_b32_e32 v16, 0x40400000 ; HSA-NEXT: v_mov_b32_e32 v17, 0x40400000 ; HSA-NEXT: v_mov_b32_e32 v18, 0x40400000 ; HSA-NEXT: v_mov_b32_e32 v19, 0x40400000 ; HSA-NEXT: v_mov_b32_e32 v20, 4.0 ; HSA-NEXT: v_mov_b32_e32 v21, 4.0 ; HSA-NEXT: v_mov_b32_e32 v22, 4.0 ; HSA-NEXT: v_mov_b32_e32 v23, 4.0 ; HSA-NEXT: v_mov_b32_e32 v24, 4.0 ; HSA-NEXT: v_mov_b32_e32 v25, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v26, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v27, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] ; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 ; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; HSA-NEXT: s_mov_b64 exec, s[6:7] ; HSA-NEXT: s_mov_b32 s33, s4 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_setpc_b64 s[30:31] entry: call void @external_void_func_8xv5f32( <5 x float>, <5 x float>, <5 x float>, <5 x float>, <5 x float>, <5 x float>, <5 x float>, <5 x float>) ret void } declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0 declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0 declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0 declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0 declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0 declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>, <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0 attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind noinline }