diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/minmax.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/minmax.ll | 44 |
1 files changed, 26 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll index 57e69430f5a8..d578d2e9720f 100644 --- a/llvm/test/CodeGen/AMDGPU/minmax.ll +++ b/llvm/test/CodeGen/AMDGPU/minmax.ll @@ -612,10 +612,10 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) ; GFX1250-NEXT: v_med3_num_f32 v2, v2, v3, v4 ; GFX1250-NEXT: global_store_b32 v[0:1], v2, off ; GFX1250-NEXT: s_set_pc_i64 s[30:31] - %tmp0 = call float @llvm.minnum.f32(float %x, float %y) - %tmp1 = call float @llvm.maxnum.f32(float %x, float %y) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z) - %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %x, float %y) + %tmp1 = call nnan float @llvm.maxnum.f32(float %x, float %y) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %z) + %tmp3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %tmp3, ptr addrspace(1) %arg ret void } @@ -638,10 +638,18 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x ; GFX12-NEXT: v_med3_num_f32 v2, v2, v3, v4 ; GFX12-NEXT: global_store_b32 v[0:1], v2, off ; GFX12-NEXT: s_setpc_b64 s[30:31] - %tmp0 = call float @llvm.minimumnum.f32(float %x, float %y) - %tmp1 = call float @llvm.maximumnum.f32(float %x, float %y) - %tmp2 = call float @llvm.minimumnum.f32(float %tmp1, float %z) - %tmp3 = call float @llvm.maximumnum.f32(float %tmp0, float %tmp2) +; +; GFX1250-LABEL: test_med3_minimumnum_maximumnum_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_med3_num_f32 v2, v2, v3, v4 +; GFX1250-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %tmp0 = call nnan float @llvm.minimumnum.f32(float %x, float %y) + %tmp1 = call nnan float @llvm.maximumnum.f32(float %x, float %y) + %tmp2 = call nnan float @llvm.minimumnum.f32(float %tmp1, float %z) + %tmp3 = call nnan float @llvm.maximumnum.f32(float %tmp0, float %tmp2) store float %tmp3, ptr addrspace(1) %arg ret void } @@ -798,7 +806,7 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b ; SDAG-GFX1250-TRUE16-NEXT: s_mov_b32 s5, s4 ; SDAG-GFX1250-TRUE16-NEXT: s_mov_b32 s4, s3 ; SDAG-GFX1250-TRUE16-NEXT: v_maxmin_num_f16 v0.l, s0, s1, v0.l -; SDAG-GFX1250-TRUE16-NEXT: flat_store_b16 v1, v0, s[4:5] +; SDAG-GFX1250-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5] ; SDAG-GFX1250-TRUE16-NEXT: s_endpgm ; ; SDAG-GFX1250-FAKE16-LABEL: s_test_minmax_f16_ieee_false: @@ -813,12 +821,12 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b ; GISEL-GFX1250-TRUE16-LABEL: s_test_minmax_f16_ieee_false: ; GISEL-GFX1250-TRUE16: ; %bb.0: ; GISEL-GFX1250-TRUE16-NEXT: s_max_num_f16 s0, s0, s1 +; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX1250-TRUE16-NEXT: s_mov_b32 s6, s3 ; GISEL-GFX1250-TRUE16-NEXT: s_mov_b32 s7, s4 -; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX1250-TRUE16-NEXT: s_min_num_f16 s0, s0, s2 -; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v0, s0 -; GISEL-GFX1250-TRUE16-NEXT: flat_store_b16 v1, v0, s[6:7] +; GISEL-GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0 +; GISEL-GFX1250-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7] ; GISEL-GFX1250-TRUE16-NEXT: s_endpgm ; ; GISEL-GFX1250-FAKE16-LABEL: s_test_minmax_f16_ieee_false: @@ -1246,7 +1254,7 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 ; SDAG-GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; SDAG-GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0 ; SDAG-GFX1250-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l -; SDAG-GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v2 +; SDAG-GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v2, off ; SDAG-GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31] ; ; SDAG-GFX1250-FAKE16-LABEL: test_med3_f16: @@ -1262,7 +1270,7 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 ; GISEL-GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GISEL-GFX1250-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l -; GISEL-GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v2 +; GISEL-GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v2, off ; GISEL-GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31] ; ; GISEL-GFX1250-FAKE16-LABEL: test_med3_f16: @@ -1272,10 +1280,10 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 ; GISEL-GFX1250-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4 ; GISEL-GFX1250-FAKE16-NEXT: global_store_b16 v[0:1], v2, off ; GISEL-GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31] - %tmp0 = call half @llvm.minnum.f16(half %x, half %y) - %tmp1 = call half @llvm.maxnum.f16(half %x, half %y) - %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z) - %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2) + %tmp0 = call nnan half @llvm.minnum.f16(half %x, half %y) + %tmp1 = call nnan half @llvm.maxnum.f16(half %x, half %y) + %tmp2 = call nnan half @llvm.minnum.f16(half %tmp1, half %z) + %tmp3 = call nnan half @llvm.maxnum.f16(half %tmp0, half %tmp2) store half %tmp3, ptr addrspace(1) %arg ret void } |
