diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index b0f2aac9a42d..7cafa2f608a4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -3990,6 +3990,116 @@ bb: ret void } +define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) { +; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: v_add_u32_e32 v0, s3, v0 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16 +; GFX9-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-NEXT: scratch_store_dword v0, v1, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0 +; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16 +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; GFX940: ; %bb.0: ; %bb +; GFX940-NEXT: v_add_u32_e32 v0, s1, v0 +; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16 +; GFX940-NEXT: v_mov_b32_e32 v1, 15 +; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; GFX12: ; %bb.0: ; %bb +; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_endpgm +; +; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; UNALIGNED_GFX9: ; %bb.0: ; %bb +; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0 +; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16 +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15 +; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: s_endpgm +; +; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; UNALIGNED_GFX10: ; %bb.0: ; %bb +; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5 +; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0 +; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0 +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16 +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: s_endpgm +; +; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; UNALIGNED_GFX940: ; %bb.0: ; %bb +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0 +; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15 +; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: s_endpgm +; +; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; UNALIGNED_GFX11: ; %bb.0: ; %bb +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 +; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: s_endpgm +; +; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; UNALIGNED_GFX12: ; %bb.0: ; %bb +; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 +; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: s_endpgm +bb: + %add1 = add nsw i32 %sidx, %vidx + %add2 = add nsw i32 %add1, -16 + %gep = getelementptr inbounds [16 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2 + store volatile i32 15, ptr addrspace(5) %gep, align 4 + ret void +} + define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) { ; GFX9-LABEL: sgpr_base_negative_offset: ; GFX9: ; %bb.0: ; %entry |
