diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/combine-vpmadd52.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-vpmadd52.ll | 400 |
1 files changed, 400 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll new file mode 100644 index 000000000000..2cb060ea92b1 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll @@ -0,0 +1,400 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX + +define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test1_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test1_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2) + ret <2 x i64> %1 +} + +define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test2_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test2_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %and) + ret <2 x i64> %1 +} + +define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_wrong_bits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_wrong_bits: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_wrong_bits: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 2251799813685247) ; (1LL << 51) - 1 + %or = or <2 x i64> %x2, splat (i64 2251799813685248) ; 1LL << 51 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_wrong_op: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_wrong_op: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %and, <2 x i64> %x1, <2 x i64> %x2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52h: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52h: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +; Test the fold x * 0 + y -> y +define <2 x i64> @test_vpmadd52l_mul_zero(<2 x i64> %x0, <2 x i64> %x1) { +; CHECK-LABEL: test_vpmadd52l_mul_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> %x1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h_mul_zero(<2 x i64> %x0, <2 x i64> %x1) { +; CHECK-LABEL: test_vpmadd52h_mul_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> %x1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_zero_commuted(<2 x i64> %x0, <2 x i64> %x1) { +; CHECK-LABEL: test_vpmadd52l_mul_zero_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 0, i64 0>) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_zero_both(<2 x i64> %x0) { +; CHECK-LABEL: test_vpmadd52l_mul_zero_both: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_zero_in_52bits(<2 x i64> %x0, <2 x i64> %x1) { +; CHECK-LABEL: test_vpmadd52l_mul_zero_in_52bits: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + ; mul by (1 << 52) + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 4503599627370496), <2 x i64> %x1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_add_zero(<2 x i64> %x0, <2 x i64> %x1) { +; AVX512-LABEL: test_vpmadd52l_add_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vmovdqa %xmm2, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_add_zero: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2 +; AVX-NEXT: vmovdqa %xmm2, %xmm0 +; AVX-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> <i64 0, i64 0>, <2 x i64> %x0, <2 x i64> %x1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_zero_scalar(<2 x i64> %x0, <2 x i64> %x1) { +; AVX512-LABEL: test_vpmadd52l_mul_zero_scalar: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_mul_zero_scalar: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 123>, <2 x i64> %x1) + ret <2 x i64> %1 +} + +; (1 << 51) * (1 << 1) -> 1 << 52 -> low 52 bits are zeroes +define <2 x i64> @test_vpmadd52l_mul_lo52_zero(<2 x i64> %x0) { +; CHECK-LABEL: test_vpmadd52l_mul_lo52_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 2251799813685248), <2 x i64> splat (i64 2)) + ret <2 x i64> %1 +} + +; (1 << 25) * (1 << 26) = 1 << 51 -> high 52 bits are zeroes +define <2 x i64> @test_vpmadd52h_mul_hi52_zero(<2 x i64> %x0) { +; CHECK-LABEL: test_vpmadd52h_mul_hi52_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 33554432), <2 x i64> splat (i64 67108864)) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_lo52_const(<2 x i64> %x0) { +; AVX512-LABEL: test_vpmadd52l_mul_lo52_const: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_mul_lo52_const: +; AVX: # %bb.0: +; AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 123), <2 x i64> splat (i64 456)) + ret <2 x i64> %1 +} + +; (1 << 51) * (1 << 51) -> 1 << 102 -> the high 52 bits is 1 << 50 +define <2 x i64> @test_vpmadd52h_mul_hi52_const(<2 x i64> %x0) { +; AVX512-LABEL: test_vpmadd52h_mul_hi52_const: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52h_mul_hi52_const: +; AVX: # %bb.0: +; AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 2251799813685248), <2 x i64> splat (i64 2251799813685248)) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_lo52_mask(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test_vpmadd52l_mul_lo52_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h_mul_hi52_mask(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test_vpmadd52h_mul_hi52_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %and1 = lshr <2 x i64> %x0, splat (i64 40) + %and2 = lshr <2 x i64> %x1, splat (i64 40) + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_lo52_mask_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_mul_lo52_mask_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm2 +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vpmadd52luq %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_mul_lo52_mask_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm2, %xmm0 +; AVX-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 2097152) ; 1LL << 21 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h_mul_hi52_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52h_mul_hi52_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlq $30, %xmm0, %xmm2 +; AVX512-NEXT: vpsrlq $43, %xmm1, %xmm1 +; AVX512-NEXT: vpmadd52huq %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52h_mul_hi52_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpsrlq $30, %xmm0, %xmm2 +; AVX-NEXT: vpsrlq $43, %xmm1, %xmm1 +; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm2, %xmm0 +; AVX-NEXT: retq + %and1 = lshr <2 x i64> %x0, splat (i64 30) + %and2 = lshr <2 x i64> %x1, splat (i64 43) + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) + ret <2 x i64> %1 +} + +define <2 x i64> @test1_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test1_knownbits_vpmadd52l: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 4) + %and2 = and <2 x i64> %x1, splat (i64 4) + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test1_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test1_knownbits_vpmadd52h: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3,3] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 3), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 3) + ret <2 x i64> %ret +} + +define <2 x i64> @test2_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test2_knownbits_vpmadd52l: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1234,1234] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 67108864) ; 1LL << 26 + %and2 = and <2 x i64> %x1, splat (i64 33554432) ; 1LL << 25 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1234), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1234) + ret <2 x i64> %ret +} + +define <2 x i64> @test2_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test2_knownbits_vpmadd52h: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + ; add (1LL << 20) + 1 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1025), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test3_knownbits_vpmadd52l_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_knownbits_vpmadd52l_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1] +; AVX512-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovdqa %xmm2, %xmm3 +; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm3 +; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_knownbits_vpmadd52l_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1] +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa %xmm2, %xmm3 +; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm3 +; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 67108865) ; (1LL << 26) + 1 + %or = or <2 x i64> %x1, splat (i64 1) + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %or) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test3_knownbits_vpmadd52h_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_knownbits_vpmadd52h_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1] +; AVX512-NEXT: vmovdqa %xmm2, %xmm3 +; AVX512-NEXT: vpmadd52huq %xmm1, %xmm0, %xmm3 +; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_knownbits_vpmadd52h_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1] +; AVX-NEXT: vmovdqa %xmm2, %xmm3 +; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm0, %xmm3 +; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 4194304) ; 1LL << 22 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + ; add (1LL << 20) + 1 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} |
