summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/dagcombine-select.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/dagcombine-select.ll163
1 files changed, 152 insertions, 11 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
index c429b1a32bde..39365fe7b6f1 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -579,18 +579,159 @@ define amdgpu_kernel void @fdiv_constant_sel_constants(ptr addrspace(1) %p, i1 %
ret void
}
+; ExpandFp now expands frem before it reaches dagcombine.
+; TODO Implement this optimization in/before ExpandFP
define amdgpu_kernel void @frem_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
-; GCN-LABEL: frem_constant_sel_constants:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s2, s[4:5], 0x2c
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_bitcmp1_b32 s2, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GCN-NEXT: v_cndmask_b32_e64 v1, 2.0, 1.0, s[2:3]
-; GCN-NEXT: global_store_dword v0, v1, s[0:1]
-; GCN-NEXT: s_endpgm
+; GFX9-LABEL: frem_constant_sel_constants:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s0, s[4:5], 0x2c
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x40400000
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_bitcmp1_b32 s0, 0
+; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v0, -4.0, s[0:1]
+; GFX9-NEXT: s_mov_b32 s0, 0x40a00000
+; GFX9-NEXT: v_cmp_nlt_f32_e64 s[2:3], |v1|, s0
+; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
+; GFX9-NEXT: s_cbranch_vccz .LBB26_2
+; GFX9-NEXT: ; %bb.1: ; %frem.else
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x40a00000
+; GFX9-NEXT: v_cmp_eq_f32_e64 s[0:1], |v1|, s0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1]
+; GFX9-NEXT: s_cbranch_execz .LBB26_3
+; GFX9-NEXT: s_branch .LBB26_7
+; GFX9-NEXT: .LBB26_2:
+; GFX9-NEXT: ; implicit-def: $vgpr0
+; GFX9-NEXT: .LBB26_3: ; %frem.compute
+; GFX9-NEXT: v_frexp_mant_f32_e64 v0, |v1|
+; GFX9-NEXT: v_ldexp_f32 v0, v0, 1
+; GFX9-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
+; GFX9-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
+; GFX9-NEXT: v_rcp_f32_e32 v4, v2
+; GFX9-NEXT: v_fma_f32 v5, -v2, v4, 1.0
+; GFX9-NEXT: v_fma_f32 v4, v5, v4, v4
+; GFX9-NEXT: v_mul_f32_e32 v5, v3, v4
+; GFX9-NEXT: v_fma_f32 v6, -v2, v5, v3
+; GFX9-NEXT: v_fma_f32 v5, v6, v4, v5
+; GFX9-NEXT: v_fma_f32 v2, -v2, v5, v3
+; GFX9-NEXT: v_div_fmas_f32 v3, v2, v4, v5
+; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v2, v1
+; GFX9-NEXT: v_sub_u32_e32 v1, 3, v2
+; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v1
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x45200000
+; GFX9-NEXT: v_div_fixup_f32 v3, v3, v0, 1.0
+; GFX9-NEXT: s_cbranch_vccnz .LBB26_6
+; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX9-NEXT: v_sub_u32_e32 v1, 15, v2
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x45200000
+; GFX9-NEXT: .LBB26_5: ; %frem.loop_body
+; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-NEXT: v_mov_b32_e32 v4, v5
+; GFX9-NEXT: v_mul_f32_e32 v5, v4, v3
+; GFX9-NEXT: v_rndne_f32_e32 v5, v5
+; GFX9-NEXT: v_fma_f32 v5, -v5, v0, v4
+; GFX9-NEXT: v_add_f32_e32 v6, v5, v0
+; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v5
+; GFX9-NEXT: v_add_u32_e32 v1, -12, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc
+; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 12, v1
+; GFX9-NEXT: v_ldexp_f32 v5, v5, 12
+; GFX9-NEXT: s_cbranch_vccnz .LBB26_5
+; GFX9-NEXT: .LBB26_6: ; %Flow12
+; GFX9-NEXT: v_add_u32_e32 v1, -11, v1
+; GFX9-NEXT: v_ldexp_f32 v1, v4, v1
+; GFX9-NEXT: v_mul_f32_e32 v3, v1, v3
+; GFX9-NEXT: v_rndne_f32_e32 v3, v3
+; GFX9-NEXT: v_fma_f32 v1, -v3, v0, v1
+; GFX9-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1
+; GFX9-NEXT: v_add_u32_e32 v2, -1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX9-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
+; GFX9-NEXT: .LBB26_7: ; %Flow14
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX942-LABEL: frem_constant_sel_constants:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s0, s[4:5], 0x2c
+; GFX942-NEXT: v_mov_b32_e32 v0, 0x40400000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_bitcmp1_b32 s0, 0
+; GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
+; GFX942-NEXT: v_cndmask_b32_e64 v1, v0, -4.0, s[0:1]
+; GFX942-NEXT: s_mov_b32 s0, 0x40a00000
+; GFX942-NEXT: v_cmp_nlt_f32_e64 s[2:3], |v1|, s0
+; GFX942-NEXT: s_and_b64 vcc, exec, s[2:3]
+; GFX942-NEXT: s_cbranch_vccz .LBB26_2
+; GFX942-NEXT: ; %bb.1: ; %frem.else
+; GFX942-NEXT: v_mov_b32_e32 v0, 0x40a00000
+; GFX942-NEXT: v_cmp_eq_f32_e64 s[0:1], |v1|, s0
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1]
+; GFX942-NEXT: s_cbranch_execz .LBB26_3
+; GFX942-NEXT: s_branch .LBB26_7
+; GFX942-NEXT: .LBB26_2:
+; GFX942-NEXT: ; implicit-def: $vgpr0
+; GFX942-NEXT: .LBB26_3: ; %frem.compute
+; GFX942-NEXT: v_frexp_mant_f32_e64 v0, |v1|
+; GFX942-NEXT: v_ldexp_f32 v0, v0, 1
+; GFX942-NEXT: v_div_scale_f32 v3, s[0:1], v0, v0, 1.0
+; GFX942-NEXT: v_rcp_f32_e32 v4, v3
+; GFX942-NEXT: v_frexp_exp_i32_f32_e32 v2, v1
+; GFX942-NEXT: v_sub_u32_e32 v1, 3, v2
+; GFX942-NEXT: v_fma_f32 v5, -v3, v4, 1.0
+; GFX942-NEXT: v_fmac_f32_e32 v4, v5, v4
+; GFX942-NEXT: v_div_scale_f32 v5, vcc, 1.0, v0, 1.0
+; GFX942-NEXT: v_mul_f32_e32 v6, v5, v4
+; GFX942-NEXT: v_fma_f32 v7, -v3, v6, v5
+; GFX942-NEXT: v_fmac_f32_e32 v6, v7, v4
+; GFX942-NEXT: v_fma_f32 v3, -v3, v6, v5
+; GFX942-NEXT: v_div_fmas_f32 v3, v3, v4, v6
+; GFX942-NEXT: v_cmp_gt_i32_e32 vcc, 13, v1
+; GFX942-NEXT: v_div_fixup_f32 v3, v3, v0, 1.0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0x45200000
+; GFX942-NEXT: s_cbranch_vccnz .LBB26_6
+; GFX942-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX942-NEXT: v_sub_u32_e32 v1, 15, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, 0x45200000
+; GFX942-NEXT: .LBB26_5: ; %frem.loop_body
+; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mul_f32_e32 v5, v4, v3
+; GFX942-NEXT: v_rndne_f32_e32 v5, v5
+; GFX942-NEXT: v_fma_f32 v5, -v5, v0, v4
+; GFX942-NEXT: v_add_f32_e32 v6, v5, v0
+; GFX942-NEXT: v_cmp_gt_f32_e32 vcc, 0, v5
+; GFX942-NEXT: v_add_u32_e32 v1, -12, v1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc
+; GFX942-NEXT: v_cmp_lt_i32_e32 vcc, 12, v1
+; GFX942-NEXT: v_ldexp_f32 v5, v5, 12
+; GFX942-NEXT: s_cbranch_vccnz .LBB26_5
+; GFX942-NEXT: .LBB26_6: ; %Flow12
+; GFX942-NEXT: v_add_u32_e32 v1, -11, v1
+; GFX942-NEXT: v_ldexp_f32 v1, v4, v1
+; GFX942-NEXT: v_mul_f32_e32 v3, v1, v3
+; GFX942-NEXT: v_rndne_f32_e32 v3, v3
+; GFX942-NEXT: v_fma_f32 v1, -v3, v0, v1
+; GFX942-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX942-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1
+; GFX942-NEXT: v_add_u32_e32 v2, -1, v2
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX942-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX942-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
+; GFX942-NEXT: .LBB26_7: ; %Flow14
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX942-NEXT: s_endpgm
%sel = select i1 %cond, float -4.0, float 3.0
%bo = frem float 5.0, %sel
store float %bo, ptr addrspace(1) %p, align 4