diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fmaximum3.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 600 |
1 files changed, 120 insertions, 480 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 3ec36f03a48a..9ce1ba3316dd 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -497,47 +497,19 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX9-LABEL: v_fmaximum3_v2f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v6, v1, v3 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v2, v4, v0 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v4, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v5, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX9-NEXT: v_max_f32_e32 v2, v5, v1 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0) @@ -559,47 +531,19 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX9-LABEL: v_fmaximum3_v2f32_commute: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v6, v1, v3 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v2, v0, v4 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX9-NEXT: v_max_f32_e32 v2, v1, v5 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c) @@ -621,47 +565,19 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX9-LABEL: v_fmaximum3_v2f32__fabs_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, |v3| -; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc +; GFX9-NEXT: v_max_f32_e64 v6, |v1|, |v3| ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v3| -; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, |v6|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, |v1|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v3|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, |v2| -; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v2| -; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, |v3|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, |v0|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v2|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, |v4| -; GFX9-NEXT: v_cndmask_b32_e64 v2, |v4|, v0, s[4:5] +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v2| +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX9-NEXT: v_max_f32_e64 v2, v0, |v4| ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v4|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, |v5| -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v2, |v5|, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX9-NEXT: v_max_f32_e64 v2, v1, |v5| ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v5|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) %b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b) @@ -686,47 +602,19 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX9-LABEL: v_fmaximum3_v2f32__fneg_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v1, -v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc +; GFX9-NEXT: v_max_f32_e64 v6, -v1, -v3 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v3 -; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, -v6, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, -v1, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v3, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v0, -v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v2 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, -v3, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, -v0, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v2, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, -v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, -v4, v0, s[4:5] +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v2 +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX9-NEXT: v_max_f32_e64 v2, v0, -v4 ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v4, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, -v5 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v2, -v5, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX9-NEXT: v_max_f32_e64 v2, v1, -v5 ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v5, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <2 x float> %a %b.fneg = fneg <2 x float> %b @@ -751,35 +639,19 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v1 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v4, 2.0, v0, vcc +; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v0 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v0, vcc +; GFX9-NEXT: v_max_f32_e32 v4, v0, v2 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-NEXT: v_max_f32_e32 v2, v1, v3 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c) @@ -801,33 +673,17 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v4, v1, v3 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 4.0, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v1 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -852,67 +708,25 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX9-LABEL: v_fmaximum3_v3f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc +; GFX9-NEXT: v_max_f32_e32 v9, v2, v5 ; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v6, v0 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v6, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v7, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v7, v1 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v7, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v8, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v2, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v8, v2 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v8, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0) @@ -935,67 +749,25 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX9-LABEL: v_fmaximum3_v3f32_commute: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc +; GFX9-NEXT: v_max_f32_e32 v9, v2, v5 ; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v0, v6 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v6, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v7 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v1, v7 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v7, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v8 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v2, v8 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v8, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c) @@ -1018,67 +790,25 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX9-LABEL: v_fmaximum3_v3f32__fabs_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v2|, |v5| -; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc +; GFX9-NEXT: v_max_f32_e64 v9, |v2|, |v5| ; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v2|, |v5| -; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, |v9|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v2|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, |v2|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v5|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v5|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, |v4| -; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v1|, |v4| -; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, |v5|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v1|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, |v1|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v4|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v4|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, |v3| -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], |v0|, |v3| -; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, |v4|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, |v0|, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v3|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v3|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, |v6| -; GFX9-NEXT: v_cndmask_b32_e64 v3, |v6|, v0, s[4:5] +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX9-NEXT: v_max_f32_e64 v5, |v1|, |v4| +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX9-NEXT: v_max_f32_e64 v4, |v0|, |v3| +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX9-NEXT: v_max_f32_e64 v3, v0, |v6| ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v6|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, |v6|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, |v7| -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v3, |v7|, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX9-NEXT: v_max_f32_e64 v3, v1, |v7| ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v7|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, |v7|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v2, |v8| -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v3, |v8|, v2, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX9-NEXT: v_max_f32_e64 v3, v2, |v8| ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], |v8|, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, |v8|, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a) %b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b) @@ -1104,67 +834,25 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX9-LABEL: v_fmaximum3_v3f32__fneg_all: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v2, -v5 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v2, vcc +; GFX9-NEXT: v_max_f32_e64 v9, -v2, -v5 ; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v2, -v5 -; GFX9-NEXT: v_cndmask_b32_e64 v9, v10, -v9, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v2, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, -v2, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v5, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v5, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v1, -v4 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v1, -v4 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v10, -v5, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v1, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, -v1, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v4, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v4, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 vcc, -v0, -v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 s[4:5], -v0, -v3 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, -v4, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, -v0, s[4:5] -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v3, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v3, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v0, -v6 -; GFX9-NEXT: v_cndmask_b32_e64 v3, -v6, v0, s[4:5] +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX9-NEXT: v_max_f32_e64 v5, -v1, -v4 +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX9-NEXT: v_max_f32_e64 v4, -v0, -v3 +; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX9-NEXT: v_max_f32_e64 v3, v0, -v6 ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v6, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -v6, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v1, -v7 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v3, -v7, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX9-NEXT: v_max_f32_e64 v3, v1, -v7 ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v7, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v7, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], v2, -v8 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v3, -v8, v2, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX9-NEXT: v_max_f32_e64 v3, v2, -v8 ; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 s[4:5], -v8, 64 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, -v8, s[4:5] -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <3 x float> %a %b.fneg = fneg <3 x float> %b @@ -1190,49 +878,25 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v2, vcc +; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v2 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v1 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v6, 2.0, v0, vcc +; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v0 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v3, v0, vcc +; GFX9-NEXT: v_max_f32_e32 v6, v0, v3 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v1, v4 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v3, v2, v5 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c) @@ -1255,47 +919,23 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v5, v2, vcc +; GFX9-NEXT: v_max_f32_e32 v6, v2, v5 ; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v2, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v5, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v1, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v4, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v3, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc +; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v0, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_class_f32_e64 vcc, v3, 64 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc +; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v0 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v1 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 4.0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, 4.0, v2, vcc +; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v2 ; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] |
