diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll | 80 |
1 files changed, 46 insertions, 34 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll index 7b9b130e1cf7..41a9d7999e80 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -36,8 +36,9 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 { ; ; GFX11-LABEL: write_ds_sub0_offset0_global: ; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v1, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0 ; GFX11-NEXT: ds_store_b32 v0, v1 offset:12 ; GFX11-NEXT: s_endpgm @@ -53,7 +54,7 @@ entry: define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 { ; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; CI: ; %bb.0: ; %entry -; CI-NEXT: s_load_dword s0, s[0:1], 0x0 +; CI-NEXT: s_load_dword s0, s[2:3], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 ; CI-NEXT: s_mov_b64 vcc, 0 @@ -73,7 +74,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; ; GFX9-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX9-NEXT: s_mov_b64 vcc, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v3, 0, v0 @@ -90,7 +91,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; ; GFX10-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b @@ -106,10 +107,11 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; ; GFX11-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-NEXT: v_dual_mov_b32 v3, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0 +; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_dual_mov_b32 v3, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 0, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 @@ -135,7 +137,7 @@ entry: define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy.val) #0 { ; CI-LABEL: write_ds_sub_max_offset_global_clamp_bit: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s0, s[0:1], 0x0 +; CI-NEXT: s_load_dword s0, s[2:3], 0x0 ; CI-NEXT: s_mov_b64 vcc, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0x7b ; CI-NEXT: v_mov_b32_e32 v2, 0 @@ -154,7 +156,7 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy ; ; GFX9-LABEL: write_ds_sub_max_offset_global_clamp_bit: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX9-NEXT: s_mov_b64 vcc, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v4, 0 @@ -170,7 +172,7 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy ; ; GFX10-LABEL: write_ds_sub_max_offset_global_clamp_bit: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x7b @@ -185,7 +187,7 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy ; ; GFX11-LABEL: write_ds_sub_max_offset_global_clamp_bit: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 +; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0x7b :: v_dual_mov_b32 v3, 0 @@ -233,7 +235,9 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 { ; ; GFX11-LABEL: add_x_shl_max_offset: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 4, v0 +; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX11-NEXT: ds_store_b8 v0, v1 offset:65535 ; GFX11-NEXT: s_endpgm %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -275,8 +279,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 { ; ; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: ds_store_b8 v0, v1 ; GFX11-NEXT: s_endpgm @@ -319,8 +324,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 { ; ; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: ds_store_b8 v0, v1 ; GFX11-NEXT: s_endpgm @@ -361,8 +367,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 { ; ; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_p1: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0x10000, v0 ; GFX11-NEXT: ds_store_b8 v0, v1 ; GFX11-NEXT: s_endpgm @@ -407,7 +414,8 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 { ; GFX11-LABEL: add_x_shl_neg_to_sub_multi_use: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0 ; GFX11-NEXT: ds_store_b32 v0, v1 offset:123 ; GFX11-NEXT: ds_store_b32 v0, v1 offset:456 @@ -455,8 +463,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 { ; ; GFX11-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0 ; GFX11-NEXT: ds_store_b32 v0, v1 offset:123 ; GFX11-NEXT: ds_store_b32 v0, v1 offset:123 @@ -503,9 +512,10 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { ; ; GFX11-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v1, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x7b +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0x3fb, v0 ; GFX11-NEXT: ds_store_2addr_b32 v0, v1, v2 offset1:1 ; GFX11-NEXT: s_endpgm @@ -521,7 +531,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 { ; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s0, s[0:1], 0x0 +; CI-NEXT: s_load_dword s0, s[2:3], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: v_sub_i32_e32 v0, vcc, 0x3fb, v0 ; CI-NEXT: s_mov_b64 vcc, 0 @@ -542,7 +552,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_ ; ; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX9-NEXT: s_mov_b64 vcc, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v3, 0x3fb, v0 @@ -560,7 +570,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_ ; ; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 @@ -578,11 +588,12 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_ ; ; GFX11-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-NEXT: v_dual_mov_b32 v3, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0 +; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11-NEXT: v_mov_b32_e32 v4, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v3, 0x7b +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 0x3fb, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 @@ -637,9 +648,10 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() # ; ; GFX11-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v1, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x7b +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0x3fc, v0 ; GFX11-NEXT: ds_store_2addr_b32 v0, v1, v2 offset1:1 ; GFX11-NEXT: s_endpgm |
