summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2024-07-15 09:59:07 +0400
committerMatt Arsenault <arsenm2@gmail.com>2024-07-15 11:51:44 +0400
commitb1bcb7ca460fcd317bbc8309e14c8761bf8394e0 (patch)
treecf2636217534435b2de9783a7cf8e9325819e658 /llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
parent71051deff27928cff908ea794e09806eee662801 (diff)
Reapply "AMDGPU: Move attributor into optimization pipeline (#83131)" and follow up commit "clang/AMDGPU: Defeat attribute optimization in attribute test" (#98851)
This reverts commit adaff46d087799072438dd744b038e6fd50a2d78. Drop the -O3 checks from default-attributes.hip. I don't know why they are different on some bots but reverting this is far too disruptive.
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll80
1 files changed, 46 insertions, 34 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index 7b9b130e1cf7..41a9d7999e80 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -36,8 +36,9 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
;
; GFX11-LABEL: write_ds_sub0_offset0_global:
; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mov_b32 v1, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
; GFX11-NEXT: ds_store_b32 v0, v1 offset:12
; GFX11-NEXT: s_endpgm
@@ -53,7 +54,7 @@ entry:
define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 {
; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit:
; CI: ; %bb.0: ; %entry
-; CI-NEXT: s_load_dword s0, s[0:1], 0x0
+; CI-NEXT: s_load_dword s0, s[2:3], 0x0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
; CI-NEXT: s_mov_b64 vcc, 0
@@ -73,7 +74,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
;
; GFX9-LABEL: write_ds_sub0_offset0_global_clamp_bit:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0
+; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX9-NEXT: s_mov_b64 vcc, 0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v0
@@ -90,7 +91,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
;
; GFX10-LABEL: write_ds_sub0_offset0_global_clamp_bit:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0
+; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_mov_b32 vcc_lo, 0
; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b
@@ -106,10 +107,11 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
;
; GFX11-LABEL: write_ds_sub0_offset0_global_clamp_bit:
; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX11-NEXT: v_dual_mov_b32 v3, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_mov_b32 vcc_lo, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v3, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 0, v0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
@@ -135,7 +137,7 @@ entry:
define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy.val) #0 {
; CI-LABEL: write_ds_sub_max_offset_global_clamp_bit:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dword s0, s[0:1], 0x0
+; CI-NEXT: s_load_dword s0, s[2:3], 0x0
; CI-NEXT: s_mov_b64 vcc, 0
; CI-NEXT: v_mov_b32_e32 v1, 0x7b
; CI-NEXT: v_mov_b32_e32 v2, 0
@@ -154,7 +156,7 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy
;
; GFX9-LABEL: write_ds_sub_max_offset_global_clamp_bit:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0
+; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX9-NEXT: s_mov_b64 vcc, 0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -170,7 +172,7 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy
;
; GFX10-LABEL: write_ds_sub_max_offset_global_clamp_bit:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0
+; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX10-NEXT: s_mov_b32 vcc_lo, 0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0x7b
@@ -185,7 +187,7 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy
;
; GFX11-LABEL: write_ds_sub_max_offset_global_clamp_bit:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
+; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 vcc_lo, 0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_dual_mov_b32 v2, 0x7b :: v_dual_mov_b32 v3, 0
@@ -233,7 +235,9 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 {
;
; GFX11-LABEL: add_x_shl_max_offset:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 4, v0
+; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX11-NEXT: ds_store_b8 v0, v1 offset:65535
; GFX11-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -275,8 +279,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
;
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: ds_store_b8 v0, v1
; GFX11-NEXT: s_endpgm
@@ -319,8 +324,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
;
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: ds_store_b8 v0, v1
; GFX11-NEXT: s_endpgm
@@ -361,8 +367,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
;
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0x10000, v0
; GFX11-NEXT: ds_store_b8 v0, v1
; GFX11-NEXT: s_endpgm
@@ -407,7 +414,8 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
; GFX11-LABEL: add_x_shl_neg_to_sub_multi_use:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
; GFX11-NEXT: ds_store_b32 v0, v1 offset:123
; GFX11-NEXT: ds_store_b32 v0, v1 offset:456
@@ -455,8 +463,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
;
; GFX11-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
; GFX11-NEXT: ds_store_b32 v0, v1 offset:123
; GFX11-NEXT: ds_store_b32 v0, v1 offset:123
@@ -503,9 +512,10 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
;
; GFX11-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x7b
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0x3fb, v0
; GFX11-NEXT: ds_store_2addr_b32 v0, v1, v2 offset1:1
; GFX11-NEXT: s_endpgm
@@ -521,7 +531,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 {
; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
; CI: ; %bb.0:
-; CI-NEXT: s_load_dword s0, s[0:1], 0x0
+; CI-NEXT: s_load_dword s0, s[2:3], 0x0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CI-NEXT: v_sub_i32_e32 v0, vcc, 0x3fb, v0
; CI-NEXT: s_mov_b64 vcc, 0
@@ -542,7 +552,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
;
; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0
+; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX9-NEXT: s_mov_b64 vcc, 0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: v_sub_u32_e32 v3, 0x3fb, v0
@@ -560,7 +570,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
;
; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0
+; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_mov_b32 vcc_lo, 0
; GFX10-NEXT: v_mov_b32_e32 v3, 0
@@ -578,11 +588,12 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
;
; GFX11-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX11-NEXT: v_dual_mov_b32 v3, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_mov_b32 vcc_lo, 0
-; GFX11-NEXT: v_mov_b32_e32 v4, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v3, 0x7b
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 0x3fb, v0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
@@ -637,9 +648,10 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #
;
; GFX11-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 0x7b :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x7b
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0x3fc, v0
; GFX11-NEXT: ds_store_2addr_b32 v0, v1, v2 offset1:1
; GFX11-NEXT: s_endpgm