diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /clang/test/CodeGen/X86 | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'clang/test/CodeGen/X86')
70 files changed, 2247 insertions, 557 deletions
diff --git a/clang/test/CodeGen/X86/amx_avx512_api.c b/clang/test/CodeGen/X86/amx_avx512_api.c index fac41ea6c214..cf3d5dbc81a7 100644 --- a/clang/test/CodeGen/X86/amx_avx512_api.c +++ b/clang/test/CodeGen/X86/amx_avx512_api.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \ -// RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \ +// RUN: -target-feature +amx-avx512 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/amxavx512-builtins.c b/clang/test/CodeGen/X86/amxavx512-builtins.c index d60929994901..dada53c2d6f8 100644 --- a/clang/test/CodeGen/X86/amxavx512-builtins.c +++ b/clang/test/CodeGen/X86/amxavx512-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \ -// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression -flax-vector-conversions=none | FileCheck %s +// RUN: -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression -flax-vector-conversions=none | FileCheck %s #include <immintrin.h> #include <stddef.h> diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index e0a21c2ac0d2..f255dbe1b2ad 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -9,6 +9,17 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -88,12 +99,14 @@ __m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_blendv_pd(V1, V2, V3); } +TEST_CONSTEXPR(match_m256d(_mm256_blendv_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0},(__m256d)(__v4df){-100.0, -101.0, -102.0, -103.0},(__m256d)(__v4df){0.0, -1.0, 1.0, -1.0}), 1.0f, -101.0, 3.0, -103.0)); __m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) { // CHECK-LABEL: test_mm256_blendv_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_blendv_ps(V1, V2, V3); } +TEST_CONSTEXPR(match_m256(_mm256_blendv_ps((__m256)(__v8sf){0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f},(__m256)(__v8sf){-100.0f, -101.0f, -102.0f, -103.0f, -104.0f, -105.0f, -106.0f, -107.0f},(__m256)(__v8sf){-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, -0.0f}), -100.0f, 1.0f, -102.0f, 3.0f, -104.0f, -105.0f, 6.0f, -107.0f)); __m256d test_mm256_broadcast_pd(__m128d* A) { // CHECK-LABEL: test_mm256_broadcast_pd @@ -923,12 +936,16 @@ __m256d test_mm256_cvtepi32_pd(__m128i A) { return _mm256_cvtepi32_pd(A); } +TEST_CONSTEXPR(match_m256d(_mm256_cvtepi32_pd((__m128i)(__v4si){-2, -1, 0, 1}), -2.0, -1.0, 0.0, 1.0)); + __m256 test_mm256_cvtepi32_ps(__m256i A) { // CHECK-LABEL: test_mm256_cvtepi32_ps // CHECK: sitofp <8 x i32> %{{.*}} to <8 x float> return _mm256_cvtepi32_ps(A); } +TEST_CONSTEXPR(match_m256(_mm256_cvtepi32_ps((__m256i)(__v8si){-8, -4, -2, -1, 0, 1, 2, 4}), -8.0f, -4.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 4.0f)); + __m128i test_mm256_cvtpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvtpd_epi32 // CHECK: call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %{{.*}}) @@ -953,6 +970,8 @@ __m256d test_mm256_cvtps_pd(__m128 A) { return _mm256_cvtps_pd(A); } +TEST_CONSTEXPR(match_m256d(_mm256_cvtps_pd((__m128){0.25f, 1.75f, -1.75f, 16777216.0f}), 0.25, 1.75, -1.75, 16777216.0)); + double test_mm256_cvtsd_f64(__m256d __a) { // CHECK-LABEL: test_mm256_cvtsd_f64 // CHECK: extractelement <4 x double> %{{.*}}, i32 0 @@ -2134,6 +2153,7 @@ __m256d test_mm256_zextpd128_pd256(__m128d A) { // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> return _mm256_zextpd128_pd256(A); } +TEST_CONSTEXPR(match_m256d(_mm256_zextpd128_pd256((__m128d){-4.0, -5.0}), -4.0, -5.0, +0.0, +0.0)); __m256 test_mm256_zextps128_ps256(__m128 A) { // CHECK-LABEL: test_mm256_zextps128_ps256 @@ -2141,6 +2161,7 @@ __m256 test_mm256_zextps128_ps256(__m128 A) { // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> return _mm256_zextps128_ps256(A); } +TEST_CONSTEXPR(match_m256(_mm256_zextps128_ps256((__m128){1.0f, 2.0f, 3.0f, 4.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f)); __m256i test_mm256_zextsi128_si256(__m128i A) { // CHECK-LABEL: test_mm256_zextsi128_si256 @@ -2148,3 +2169,4 @@ __m256i test_mm256_zextsi128_si256(__m128i A) { // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> return _mm256_zextsi128_si256(A); } +TEST_CONSTEXPR(match_m256i(_mm256_zextsi128_si256((__m128i){1, 2}), 1, 2, 0, 0)); diff --git a/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c index 78405d63984f..25559d6cbfe9 100644 --- a/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-512 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-512 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c index 3ac7c2cc8716..7fbdcdc2d18c 100644 --- a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c b/clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c index 4e467b36b234..fa338253f58d 100644 --- a/clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512minmax-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c index 2ee496d317a5..ddc4d8df28ea 100644 --- a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c +++ b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2 \ // RUN: -Wno-invalid-feature-combination -verify -fsyntax-only -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2 \ // RUN: -Wno-invalid-feature-combination -verify -fsyntax-only #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c index d143188ee0f3..728c9f5652dd 100644 --- a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i686 -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i686 -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c index 0a1c32914439..ac1b8cd88db0 100755 --- a/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c index 46d476484407..2343016d7249 100644 --- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c +++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -Wall -Werror -verify +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2 -Wall -Werror -verify #include <immintrin.h> #include <stddef.h> @@ -49,4 +49,4 @@ __m512i test_mm512_mask_cvtts_roundps_epu32(__m512i W, __mmask8 U, __m512 A) { __m512i test_mm512_maskz_cvtts_roundps_epu32(__mmask8 U, __m512 A) { return _mm512_maskz_cvtts_roundps_epu32(U, A, 22); // expected-error {{invalid rounding argument}} -}
\ No newline at end of file +} diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c index 334edfb501e2..f85d8f4d94a6 100755 --- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c +++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -Wall -Werror -verify +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2 -Wall -Werror -verify #include <immintrin.h> #include <stddef.h> diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c index 1aaa6544d1f9..a5296820bc68 100644 --- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c +++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s #include <immintrin.h> #include <stddef.h> diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c index c1b6df3cb07f..a5e8121bddc9 100644 --- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86 -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 #include <immintrin.h> #include <stddef.h> diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c index 3cc9e1d4233b..c7fea07421b5 100644 --- a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2convert-builtins.c b/clang/test/CodeGen/X86/avx10_2convert-builtins.c index 31dd0ecc381e..025ab77b4bae 100644 --- a/clang/test/CodeGen/X86/avx10_2convert-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2convert-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-256 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-256 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c index 7dad153a15c3..97faaf1ef4e1 100644 --- a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-256 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-256 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c index b4b12c953194..a250d91ae598 100644 --- a/clang/test/CodeGen/X86/avx10_2ni-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i686 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i686 -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2satcvt-builtins.c b/clang/test/CodeGen/X86/avx10_2satcvt-builtins.c index 7f30befefffe..12b845cda44b 100644 --- a/clang/test/CodeGen/X86/avx10_2satcvt-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2satcvt-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-256 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2 \ // RUN: -Wno-invalid-feature-combination -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-256 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2 \ // RUN: -Wno-invalid-feature-combination -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c index 070065228e5a..bcfe43b1f34a 100644 --- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c +++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s #include <immintrin.h> #include <stddef.h> diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c index 86e9df44c066..a0e5cfdcd5ec 100644 --- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK #include <immintrin.h> #include <stddef.h> diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 29cb3e8860be..aeb1aee4ea94 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -7,6 +7,14 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 #include <immintrin.h> #include "builtin_test_helpers.h" @@ -18,18 +26,21 @@ __m256i test_mm256_abs_epi8(__m256i a) { // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) return _mm256_abs_epi8(a); } +TEST_CONSTEXPR(match_v32qi(_mm256_abs_epi8((__m256i)(__v32qs){0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 100, 50, 100, 20, 80, 50, 120, 20, 100, 50, 100, 20, 80, 50, 120, 20)); __m256i test_mm256_abs_epi16(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi16 // CHECK: [[ABS:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %{{.*}}, i1 false) return _mm256_abs_epi16(a); } +TEST_CONSTEXPR(match_v16hi(_mm256_abs_epi16((__m256i)(__v16hi){+5, -3, -32767, +32767, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129}), 5, 3, 32767, 32767, 10, 8, 0, 256, 256, 128, 3, 9, 15, 33, 63, 129)); __m256i test_mm256_abs_epi32(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi32 // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) return _mm256_abs_epi32(a); } +TEST_CONSTEXPR(match_v8si(_mm256_abs_epi32((__m256i)(__v8si){+5, -3, -2147483647, +2147483647, 0, -256, +256, +1025}), 5, 3, 2147483647, 2147483647, 0, 256, 256, 1025)); __m256i test_mm256_add_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_add_epi8 @@ -117,12 +128,14 @@ __m256i test_mm256_avg_epu8(__m256i a, __m256i b) { // CHECK: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_avg_epu8(a, b); } +TEST_CONSTEXPR(match_v32qu(_mm256_avg_epu8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); __m256i test_mm256_avg_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_avg_epu16 // CHECK: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_avg_epu16(a, b); } +TEST_CONSTEXPR(match_v16hu(_mm256_avg_epu16((__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); // FIXME: We should also lower the __builtin_ia32_pblendw128 (and similar) // functions to this IR. In the future we could delete the corresponding @@ -153,6 +166,7 @@ __m256i test_mm256_blendv_epi8(__m256i a, __m256i b, __m256i m) { // CHECK: call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_blendv_epi8(a, b, m); } +TEST_CONSTEXPR(match_v32qi(_mm256_blendv_epi8((__m256i)(__v32qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31},(__m256i)(__v32qs){-90,-91,-92,-93,-94,-95,-96,-97,-98,-99,-100,-101,-12,-13,-104,-105,-106,-107,-108,-109,-100,-101,-12,-13,-104,-105,-106,-107,-108,-109,-120,-121},(__m256i)(__v32qs){0,0,0,-1,0,-1,-1,0,0,0,-1,-1,0,-1,0,0,0,0,0,0,0,0,0,-1,-1,-1,0,0,0,0,0,-1}), 0, 1, 2, -93, 4, -95, -96, 7, 8, 9, -100, -101, 12, -13, 14, 15, 16, 17, 18, 19, 20, 21, 22, -13, -104, -105, 26, 27, 28, 29, 30, -121)); __m128i test_mm_broadcastb_epi8(__m128i a) { // CHECK-LABEL: test_mm_broadcastb_epi8 @@ -279,48 +293,62 @@ __m256i test_mm256_cmpeq_epi8(__m256i a, __m256i b) { // CHECK: icmp eq <32 x i8> return _mm256_cmpeq_epi8(a, b); } +TEST_CONSTEXPR(match_v16qi(_mm_cmpeq_epi8( + (__m128i)(__v16qs){1,-2,3,-4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16}, + (__m128i)(__v16qs){10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16}), + 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1)); __m256i test_mm256_cmpeq_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_cmpeq_epi16 // CHECK: icmp eq <16 x i16> return _mm256_cmpeq_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_cmpeq_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-10, -2, +6, -4, +5, -12, +14, -8, +9, -20, +22, -12, +26, -14, +30, -16}), 0, -1, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1)); __m256i test_mm256_cmpeq_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_cmpeq_epi32 // CHECK: icmp eq <8 x i32> return _mm256_cmpeq_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_cmpeq_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-10, -2, +6, -4, +5, -12, +14, -8}), 0, -1, 0, -1, -1, 0, 0, -1)); __m256i test_mm256_cmpeq_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_cmpeq_epi64 // CHECK: icmp eq <4 x i64> return _mm256_cmpeq_epi64(a, b); } +TEST_CONSTEXPR(match_v4di(_mm256_cmpeq_epi64((__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-10, -2, +6, -4}), 0, -1, 0, -1)); __m256i test_mm256_cmpgt_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_cmpgt_epi8 // CHECK: icmp sgt <32 x i8> return _mm256_cmpgt_epi8(a, b); } +TEST_CONSTEXPR(match_v32qi(_mm256_cmpgt_epi8( + (__m256i)(__v32qs){1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16}, + (__m256i)(__v32qs){10, -2, 6, -5, 30, -7, 8, -1, 20, -3, 12, -8, 25, -10, 9, -2, -10, 2, -6, 5, -30, 7, -8, 1, -20, 3, -12, 8, -25, 10, -9, 2}), + 0, 0, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1)); __m256i test_mm256_cmpgt_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_cmpgt_epi16 // CHECK: icmp sgt <16 x i16> return _mm256_cmpgt_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_cmpgt_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v16hi){-10, -2, +6, -5, +30, -7, +8, -1, -10, -2, +6, -5, +30, -7, +8, -1}), -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, 0)); __m256i test_mm256_cmpgt_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_cmpgt_epi32 // CHECK: icmp sgt <8 x i32> return _mm256_cmpgt_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_cmpgt_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-10, -2, +6, -5, +30, -7, +8, -1}), -1, 0, 0, -1, 0, -1, 0, 0)); __m256i test_mm256_cmpgt_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_cmpgt_epi64 // CHECK: icmp sgt <4 x i64> return _mm256_cmpgt_epi64(a, b); } +TEST_CONSTEXPR(match_v4di(_mm256_cmpgt_epi64((__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-10, -2, +6, -5}), -1, 0, 0, -1)); __m256i test_mm256_cvtepi8_epi16(__m128i a) { // CHECK-LABEL: test_mm256_cvtepi8_epi16 @@ -821,72 +849,96 @@ __m256i test_mm256_max_epi8(__m256i a, __m256i b) { return _mm256_max_epi8(a, b); } +TEST_CONSTEXPR(match_v32qi(_mm256_max_epi8((__m256i)(__v32qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m256i)(__v32qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16, +17, +18, +19, +20, +21, +22, +23, +24, +25, +26, +27, +28, +29, +30, +31, +32)); + __m256i test_mm256_max_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epi16 // CHECK: call <16 x i16> @llvm.smax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_max_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_max_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16)); + __m256i test_mm256_max_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epi32 // CHECK: call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_max_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_max_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-1, +2, -3, +4, -5, +6, -7, +8}), +1, +2, +3, +4, +5, +6, +7, +8)); + __m256i test_mm256_max_epu8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epu8 // CHECK: call <32 x i8> @llvm.umax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_max_epu8(a, b); } +TEST_CONSTEXPR(match_v32qu(_mm256_max_epu8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); + __m256i test_mm256_max_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epu16 // CHECK: call <16 x i16> @llvm.umax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_max_epu16(a, b); } +TEST_CONSTEXPR(match_v16hu(_mm256_max_epu16((__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); + __m256i test_mm256_max_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epu32 // CHECK: call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_max_epu32(a, b); } +TEST_CONSTEXPR(match_v8su(_mm256_max_epu32((__m256i)(__v8su){1, 2, 3, 4, 5, 6, 7, 8}, (__m256i)(__v8su){0, 1, 2, 3, 4, 5, 6, 7}), 1, 2, 3, 4, 5, 6, 7, 8)); + __m256i test_mm256_min_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epi8 // CHECK: call <32 x i8> @llvm.smin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_min_epi8(a, b); } +TEST_CONSTEXPR(match_v32qi(_mm256_min_epi8((__m256i)(__v32qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m256i)(__v32qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32)); + __m256i test_mm256_min_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epi16 // CHECK: call <16 x i16> @llvm.smin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_min_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_min_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16)); + __m256i test_mm256_min_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epi32 // CHECK: call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_min_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_min_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-1, +2, -3, +4, -5, +6, -7, +8}), -1, -2, -3, -4, -5, -6, -7, -8)); + __m256i test_mm256_min_epu8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epu8 // CHECK: call <32 x i8> @llvm.umin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_min_epu8(a, b); } +TEST_CONSTEXPR(match_v32qu(_mm256_min_epu8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); + __m256i test_mm256_min_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epu16 // CHECK: call <16 x i16> @llvm.umin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_min_epu16(a, b); } +TEST_CONSTEXPR(match_v16hu(_mm256_min_epu16((__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); + __m256i test_mm256_min_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epu32 // CHECK: call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_min_epu32(a, b); } +TEST_CONSTEXPR(match_v8su(_mm256_min_epu32((__m256i)(__v8su){1, 2, 3, 4, 5, 6, 7, 8}, (__m256i)(__v8su){0, 1, 2, 3, 4, 5, 6, 7}), 0, 1, 2, 3, 4, 5, 6, 7)); + int test_mm256_movemask_epi8(__m256i a) { // CHECK-LABEL: test_mm256_movemask_epi8 // CHECK: call {{.*}}i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}}) @@ -951,6 +1003,7 @@ __m256i test_mm256_mullo_epi32(__m256i a, __m256i b) { // CHECK: mul <8 x i32> return _mm256_mullo_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_mullo_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 28, 36, -40, -40, -36, -28, -16)); __m256i test_mm256_or_si256(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_or_si256 @@ -1066,6 +1119,11 @@ __m256i test_mm256_slli_epi16(__m256i a) { // CHECK: call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %{{.*}}, i32 %{{.*}}) return _mm256_slli_epi16(a, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 0), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e)); +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 15), 0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000)); +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_slli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 17), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_slli_epi16_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_slli_epi16_2 @@ -1078,6 +1136,11 @@ __m256i test_mm256_slli_epi32(__m256i a) { // CHECK: call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %{{.*}}, i32 %{{.*}}) return _mm256_slli_epi32(a, 3); } +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 0), 0, 1, 2, 3, 4, 5, 6, 7)); +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe)); +TEST_CONSTEXPR(match_v8su(_mm256_slli_epi32((__m256i)(__v8su){0, 1, 2, 3, 4, 5, 6, 7}, 31), 0, 0x80000000, 0x0, 0x80000000, 0x0, 0x80000000, 0x0, 0x80000000)); +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 32), 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_slli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 33), 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_slli_epi32_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_slli_epi32_2 @@ -1090,6 +1153,11 @@ __m256i test_mm256_slli_epi64(__m256i a) { // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) return _mm256_slli_epi64(a, 3); } +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 0), 0, 1, 2, 3)); +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 1), 0, 0x2, 0x4, 0x6)); +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 33), 0, 0x200000000LL, 0x400000000LL, 0x600000000LL)); +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 64), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_slli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 65), 0, 0, 0, 0)); __m256i test_mm256_slli_epi64_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_slli_epi64_2 @@ -1148,6 +1216,7 @@ __m256i test_mm256_srai_epi16(__m256i a) { // CHECK: call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %{{.*}}, i32 %{{.*}}) return _mm256_srai_epi16(a, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_srai_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, 1), -16384, 16383, -2, -1, -1, 0, 0, 1, -16384, 16383, -2, -1, -1, 0, 0, 1)); __m256i test_mm256_srai_epi16_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_srai_epi16_2 @@ -1160,6 +1229,7 @@ __m256i test_mm256_srai_epi32(__m256i a) { // CHECK: call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %{{.*}}, i32 %{{.*}}) return _mm256_srai_epi32(a, 3); } +TEST_CONSTEXPR(match_v8si(_mm256_srai_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, 1), -16384, 16383, -2, -1, -1, 0, 0, 1)); __m256i test_mm256_srai_epi32_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_srai_epi32_2 @@ -1204,6 +1274,7 @@ __m256i test_mm256_srli_epi16(__m256i a) { // CHECK: call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %{{.*}}, i32 %{{.*}}) return _mm256_srli_epi16(a, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_srli_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 0x7, 0x7)); __m256i test_mm256_srli_epi16_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_srli_epi16_2 @@ -1216,6 +1287,7 @@ __m256i test_mm256_srli_epi32(__m256i a) { // CHECK: call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %{{.*}}, i32 %{{.*}}) return _mm256_srli_epi32(a, 3); } +TEST_CONSTEXPR(match_v8si(_mm256_srli_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 31), 0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_srli_epi32_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_srli_epi32_2 @@ -1228,6 +1300,7 @@ __m256i test_mm256_srli_epi64(__m256i a) { // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) return _mm256_srli_epi64(a, 3); } +TEST_CONSTEXPR(match_v4di(_mm256_srli_epi64((__m256i)(__v4di){0, 1, 2, 3}, 33), 0, 0x0, 0x0, 0x0)); __m256i test_mm256_srli_epi64_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_srli_epi64_2 diff --git a/clang/test/CodeGen/X86/avx512-error.c b/clang/test/CodeGen/X86/avx512-error.c index 645126916572..e76d0d7a4551 100644 --- a/clang/test/CodeGen/X86/avx512-error.c +++ b/clang/test/CodeGen/X86/avx512-error.c @@ -1,12 +1,9 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1 -emit-llvm -o /dev/null -verify #include <immintrin.h> -// No error emitted whether we have "evex512" feature or not. -__attribute__((target("avx512bw,no-evex512"))) +__attribute__((target("avx512bw"))) __mmask64 k64_verify_1(__mmask64 a) { return _knot_mask64(a); // expected-no-diagnostics } @@ -15,19 +12,12 @@ __mmask64 k64_verify_2(__mmask64 a) { return _knot_mask64(a); // expected-no-diagnostic } -__attribute__((target("avx512bw,evex512"))) +__attribute__((target("avx512bw"))) __m512d zmm_verify_ok(__m512d a) { - // No error emitted if we have "evex512" feature. return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // expected-no-diagnostic } __m512d zmm_error(__m512d a) { // CHECK-LABEL: @test_mm512_sqrt_pd - return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // noevex-error {{'__builtin_ia32_sqrtpd512' needs target feature avx512f,evex512}} + return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // noevex-error {{'__builtin_ia32_sqrtpd512' needs target feature avx512f}} } -#if defined(__AVX10_1__) && !defined(__AVX10_1_512__) -// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}} -// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}} -// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}} -// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}} -#endif diff --git a/clang/test/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CodeGen/X86/avx512-reduceIntrin.c index 4069b463e3f1..598bca4c538b 100644 --- a/clang/test/CodeGen/X86/avx512-reduceIntrin.c +++ b/clang/test/CodeGen/X86/avx512-reduceIntrin.c @@ -3,6 +3,12 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c index 0110079a007d..309fc289abc7 100644 --- a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c index 8092f2d30214..3ac8674421d9 100644 --- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c +++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c @@ -3,6 +3,12 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 02cedc3c73fb..9d605efcbd75 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -834,6 +839,7 @@ __m512i test_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __ //CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_mullo_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_mullo_epi16((__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}, 0x0000FFFF, (__m512i)(__v32hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33}, (__m512i)(__v32hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34}), -6, -12, -20, -30, -42, -56, -72, -90, -110, -132, -156, -182, -210, -240, -272, -306, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32)); __m512i test_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mullo_epi16 @@ -841,6 +847,7 @@ __m512i test_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { //CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_mullo_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mullo_epi16(0x0000FFFF, (__m512i)(__v32hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33}, (__m512i)(__v32hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34}), -6, -12, -20, -30, -42, -56, -72, -90, -110, -132, -156, -182, -210, -240, -272, -306, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) { // CHECK-LABEL: test_mm512_mask_blend_epi8 @@ -857,35 +864,47 @@ __m512i test_mm512_abs_epi8(__m512i __A) { // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) return _mm512_abs_epi8(__A); } +TEST_CONSTEXPR(match_v64qi(_mm512_abs_epi8((__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 100, 50, 100, 20, 80, 50, 120, 20, 100, 50, 100, 20, 80, 50, 120, 20)); + __m512i test_mm512_mask_abs_epi8(__m512i __W, __mmask64 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_abs_epi8 // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[ABS]], <64 x i8> %{{.*}} return _mm512_mask_abs_epi8(__W,__U,__A); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_abs_epi8((__m512i)(__v64qi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, (__mmask64)0x000000000000001, (__m512i)(__v64qi){(char)-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); + __m512i test_mm512_maskz_abs_epi8(__mmask64 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_abs_epi8 // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[ABS]], <64 x i8> %{{.*}} return _mm512_maskz_abs_epi8(__U,__A); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_abs_epi8((__mmask64)0x000000000000001, (__m512i)(__v64qi){(char)-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_abs_epi16(__m512i __A) { // CHECK-LABEL: test_mm512_abs_epi16 // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) return _mm512_abs_epi16(__A); } +TEST_CONSTEXPR(match_v32hi(_mm512_abs_epi16((__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +32000, -32000, +32000, -32000}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 32000, 32000, 32000, 32000)); + __m512i test_mm512_mask_abs_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_abs_epi16 // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[ABS]], <32 x i16> %{{.*}} return _mm512_mask_abs_epi16(__W,__U,__A); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_abs_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, (__mmask32)0x00000001, (__m512i)(__v32hi){-1000, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}), 1000, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); + __m512i test_mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_abs_epi16 // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[ABS]], <32 x i16> %{{.*}} return _mm512_maskz_abs_epi16(__U,__A); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_abs_epi16((__mmask32)0x00000001, (__m512i)(__v32hi){-1000, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}), 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_packs_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_packs_epi32 // CHECK: @llvm.x86.avx512.packssdw.512 @@ -1041,171 +1060,255 @@ __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.pavg.b.512 return _mm512_avg_epu8(__A,__B); } +TEST_CONSTEXPR(match_v64qu(_mm512_avg_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64)); + __m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_avg_epu8 // CHECK: @llvm.x86.avx512.pavg.b.512 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_avg_epu8((__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00000000FFFFFFFF, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_avg_epu8 // CHECK: @llvm.x86.avx512.pavg.b.512 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_avg_epu8(0x00000000FFFFFFFF, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 return _mm512_avg_epu16(__A,__B); } +TEST_CONSTEXPR(match_v32hu(_mm512_avg_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); + __m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_avg_epu16((__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_avg_epu16(0x0000FFFF, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_max_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epi8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_max_epi8(__A,__B); } + +TEST_CONSTEXPR(match_v64qi(_mm512_max_epi8((__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); + __m512i test_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_max_epi8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_max_epi8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_max_epi8(0x00000000FFFFFFFF, (__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_max_epi8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_max_epi8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v64qi(_mm512_mask_max_epi8((__m512i)(__v64qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63, +64}, 0x00000000FFFFFFFF, (__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63, +64)); + __m512i test_mm512_max_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epi16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_max_epi16(__A,__B); } + +TEST_CONSTEXPR(match_v32hi(_mm512_max_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16, +17, +18, +19, +20, +21, +22, +23, +24, +25, +26, +27, +28, +29, +30, +31, +32)); + __m512i test_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_max_epi16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_max_epi16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_max_epi16(0x0000FFFF, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_max_epi16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_max_epi16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v32hi(_mm512_mask_max_epi16((__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}, 0x0000FFFF, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32)); + __m512i test_mm512_max_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epu8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_max_epu8(__A,__B); } + +TEST_CONSTEXPR(match_v64qu(_mm512_max_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64)); + __m512i test_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_max_epu8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_max_epu8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v64qu(_mm512_maskz_max_epu8(0x00000000FFFFFFFF, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_max_epu8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_max_epu8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v64qu(_mm512_mask_max_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, 0x00000000FFFFFFFF, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64)); + __m512i test_mm512_max_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epu16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_max_epu16(__A,__B); } + +TEST_CONSTEXPR(match_v32hu(_mm512_max_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); + __m512i test_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_max_epu16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_max_epu16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v32hu(_mm512_maskz_max_epu16(0x0000FFFF, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_max_epu16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_max_epu16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v32hu(_mm512_mask_max_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, 0x0000FFFF, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); + __m512i test_mm512_min_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_min_epi8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_min_epi8(__A,__B); } + +TEST_CONSTEXPR(match_v64qi(_mm512_min_epi8((__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63}), 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63)); + __m512i test_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_min_epi8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_min_epi8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_min_epi8(0x00000000FFFFFFFF, (__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63}), 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_min_epi8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_min_epi8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v64qi(_mm512_mask_min_epi8((__m512i)(__v64qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63, +64}, 0x00000000FFFFFFFF, (__m512i)(__v64qs){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, -48, +49, -50, +51, -52, +53, -54, +55, -56, +57, -58, +59, -60, +61, -62, +63}, (__m512i)(__v64qs){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63}), 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, +48, -49, +50, -51, +52, -53, +54, -55, +56, -57, +58, -59, +60, -61, +62, -63, +64)); + __m512i test_mm512_min_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_min_epi16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_min_epi16(__A,__B); } + +TEST_CONSTEXPR(match_v32hi(_mm512_min_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32)); + __m512i test_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_min_epi16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_min_epi16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_min_epi16(0x0000FFFF, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_min_epi16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_min_epi16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v32hi(_mm512_mask_min_epi16((__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}, 0x0000FFFF, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32)); + __m512i test_mm512_min_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_min_epu8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_min_epu8(__A,__B); } + +TEST_CONSTEXPR(match_v64qu(_mm512_min_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); + __m512i test_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_min_epu8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_maskz_min_epu8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v64qu(_mm512_maskz_min_epu8(0x00000000FFFFFFFF, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_min_epu8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> {{.*}}, <64 x i8> [[RES]], <64 x i8> {{.*}} return _mm512_mask_min_epu8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v64qu(_mm512_mask_min_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, 0x00000000FFFFFFFF, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64)); + __m512i test_mm512_min_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_min_epu16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_min_epu16(__A,__B); } + +TEST_CONSTEXPR(match_v32hu(_mm512_min_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); + __m512i test_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_min_epu16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_maskz_min_epu16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v32hu(_mm512_maskz_min_epu16(0x0000FFFF, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_min_epu16 // CHECK: [[RES:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}) // CHECK: select <32 x i1> {{.*}}, <32 x i16> [[RES]], <32 x i16> {{.*}} return _mm512_mask_min_epu16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v32hu(_mm512_mask_min_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, 0x0000FFFF, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); + __m512i test_mm512_shuffle_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shuffle_epi8 // CHECK: @llvm.x86.avx512.pshuf.b.512 @@ -1653,6 +1756,7 @@ __m512i test_mm512_sllv_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.psllv.w.512( return _mm512_sllv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_sllv_epi16((__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -128, 0, 528, 0, 2176, 4416, -8960, 18176, 0, 9216, 20480, 20480, 0, -16384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_sllv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_sllv_epi16 @@ -1660,6 +1764,7 @@ __m512i test_mm512_mask_sllv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_sllv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_sllv_epi16((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -128, 0, 999, 0, 999, 4416, -8960, 999, 0, 9216, 20480, 999, 999, -16384, 0, 999, 999, 999, 999, 999, 999, 0, 999, 999, 0, 999, 999, 999, 0, 0, 999, 0)); __m512i test_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_sllv_epi16 @@ -1667,6 +1772,7 @@ __m512i test_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_sllv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sllv_epi16(0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -128, 0, 0, 0, 0, 4416, -8960, 0, 0, 9216, 20480, 0, 0, -16384, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_sll_epi16(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_sll_epi16 @@ -1693,6 +1799,11 @@ __m512i test_mm512_slli_epi16(__m512i __A) { // CHECK: @llvm.x86.avx512.pslli.w.512 return _mm512_slli_epi16(__A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 0), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 1), 0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e)); +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 15), 0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000, 0x0, 0x8000)); +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_slli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 17), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_slli_epi16_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_slli_epi16_2 @@ -1706,6 +1817,7 @@ __m512i test_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_slli_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_slli_epi16((__m512i)(__v32hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131}, (__mmask32)~(__mmask32)0, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 1), 0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e)); __m512i test_mm512_mask_slli_epi16_2(__m512i __W, __mmask32 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_slli_epi16_2 @@ -1720,6 +1832,11 @@ __m512i test_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_slli_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x00ffcc71, (__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0, (__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0xffffffff, (__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x7fffffff, (__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_slli_epi16((__mmask32)0x71ccff00, (__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0, 0, 0, 0, 0, 0, 0, 0, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0, 0, 0x4, 0x6, 0, 0, 0xc, 0xe, 0x10, 0, 0, 0, 0x18, 0x1a, 0x1c, 0)); __m512i test_mm512_maskz_slli_epi16_2(__mmask32 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_maskz_slli_epi16_2 @@ -1739,6 +1856,7 @@ __m512i test_mm512_srlv_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.psrlv.w.512( return _mm512_srlv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_srlv_epi16((__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), 32736, 0, 8, 0, 2, 1, 511, 0, 0, 0, 0, 15, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_srlv_epi16 @@ -1746,6 +1864,7 @@ __m512i test_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srlv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_srlv_epi16((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), 32736, 0, 999, 0, 999, 1, 511, 999, 0, 0, 0, 999, 999, 3, 0, 999, 999, 999, 999, 999, 999, 0, 999, 999, 0, 999, 999, 999, 0, 0, 999, 0)); __m512i test_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_srlv_epi16 @@ -1753,12 +1872,14 @@ __m512i test_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srlv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srlv_epi16(0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), 32736, 0, 0, 0, 0, 1, 511, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_srav_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_srav_epi16 // CHECK: @llvm.x86.avx512.psrav.w.512( return _mm512_srav_epi16(__A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_srav_epi16((__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -32, 0, 8, -1, 2, 1, -1, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, -1, -1, 0, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, -1, -1)); __m512i test_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_srav_epi16 @@ -1766,6 +1887,7 @@ __m512i test_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srav_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_srav_epi16((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -32, 0, 999, -1, 999, 1, -1, 999, -1, 0, 0, 999, 999, -1, 0, 999, 999, 999, 999, 999, 999, 0, 999, 999, 0, 999, 999, 999, 0, 0, 999, -1)); __m512i test_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_srav_epi16 @@ -1773,6 +1895,7 @@ __m512i test_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srav_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srav_epi16(0xB120676B, (__m512i)(__v32hi){ -64, 65, 66, -67, 68, 69, -70, 71, -72, 73, 74, -75, 76, -77, 78, 79, -80, -81, -82, 83, -84, 85, 86, 87, 88, -89, -90, 91, 92, 93, -94, -95}, (__m512i)(__v32hi){ 1, -2, 3, -4, 5, 6, 7, 8, -9, 10, 11, 12, -13, 14, -15, -16, 17, 18, -19, -20, -21, 22, -23, 24, 25, -26, 27, -28, -29, -30, -31, -32}), -32, 0, 0, -1, 0, 1, -1, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1)); __m512i test_mm512_sra_epi16(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_sra_epi16 @@ -1799,6 +1922,7 @@ __m512i test_mm512_srai_epi16(__m512i __A) { // CHECK: @llvm.x86.avx512.psrai.w.512 return _mm512_srai_epi16(__A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_srai_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 10), 0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_srai_epi16_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_srai_epi16_2 @@ -1812,6 +1936,7 @@ __m512i test_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srai_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_srai_epi16((__m512i)(__v32hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131}, (__mmask32)~(__mmask32)0, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 1), 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe, 0xe, 0xf, 0xf)); __m512i test_mm512_mask_srai_epi16_2(__m512i __W, __mmask32 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_srai_epi16_2 @@ -1826,6 +1951,7 @@ __m512i test_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srai_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srai_epi16((__mmask32)0xAAAAAAAA, (__m512i)(__v32hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -1234, 1234, -32767, 32766, -5, 5, -256, 256, -42, 42, -7, 7, -30000, 30000, -1, -1, 0, -2, 2, -32768, 32767, -32768, -123, 123 }, 5), 0, 1023, 0, -1, 0, 0, 0, 0, 0, 38, 0, 1023, 0, 0, 0, 8, 0, 1, 0, 0, 0, 937, 0, -1, 0, -1, 0, -1024, 0, -1024, 0, 3 )); __m512i test_mm512_maskz_srai_epi16_2(__mmask32 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_maskz_srai_epi16_2 @@ -1859,6 +1985,7 @@ __m512i test_mm512_srli_epi16(__m512i __A) { // CHECK: @llvm.x86.avx512.psrli.w.512 return _mm512_srli_epi16(__A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_srli_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 15), 0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_srli_epi16_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_srli_epi16_2 @@ -1872,6 +1999,7 @@ __m512i test_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srli_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_srli_epi16((__m512i)(__v32hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131}, (__mmask32)~(__mmask32)0, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 1), 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe, 0xe, 0xf, 0xf)); __m512i test_mm512_mask_srli_epi16_2(__m512i __W, __mmask32 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_srli_epi16_2 @@ -1886,6 +2014,7 @@ __m512i test_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srli_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srli_epi16((__mmask32)0x71ccff00, (__m512i)(__v32hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0, 0, 0, 0, 0, 0, 0, 0, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 0x7, 0x7, 0, 0, 0x1, 0x1, 0, 0, 0x3, 0x3, 0x4, 0, 0, 0, 0x6, 0x6, 0x7, 0 )); __m512i test_mm512_maskz_srli_epi16_2(__mmask32 __U, __m512i __A, int __B) { // CHECK-LABEL: test_mm512_maskz_srli_epi16_2 diff --git a/clang/test/CodeGen/X86/avx512cd-builtins.c b/clang/test/CodeGen/X86/avx512cd-builtins.c index 45df0fccb322..b9d42b7dea23 100644 --- a/clang/test/CodeGen/X86/avx512cd-builtins.c +++ b/clang/test/CodeGen/X86/avx512cd-builtins.c @@ -2,9 +2,14 @@ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" __m512i test_mm512_conflict_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_conflict_epi64 @@ -42,39 +47,70 @@ __m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) { } __m512i test_mm512_lzcnt_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_lzcnt_epi32 - // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) + // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <16 x i32> %{{.*}}, zeroinitializer + // CHECK: select <16 x i1> [[ISZERO]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_lzcnt_epi32(__A); } + +TEST_CONSTEXPR(match_v16si(_mm512_lzcnt_epi32((__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 30, 29, 28, 27, 26, 25, 24, 30, 29, 29, 29, 28, 28, 28, 28)); +TEST_CONSTEXPR(match_v16si(_mm512_lzcnt_epi32((__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32)); + __m512i test_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_lzcnt_epi32 - // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) + // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <16 x i32> %{{.*}}, zeroinitializer + // CHECK: select <16 x i1> [[ISZERO]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_lzcnt_epi32(__W,__U,__A); } + +TEST_CONSTEXPR(match_v16si(_mm512_mask_lzcnt_epi32(_mm512_set1_epi32(32), /*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 32, 29, 28, 32, 26, 32, 24, 32, 32, 29, 29, 32, 28, 32, 28)); + __m512i test_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_lzcnt_epi32 - // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) + // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <16 x i32> %{{.*}}, zeroinitializer + // CHECK: select <16 x i1> [[ISZERO]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_lzcnt_epi32(__U,__A); } + +TEST_CONSTEXPR(match_v16si(_mm512_maskz_lzcnt_epi32(/*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 0, 29, 28, 0, 26, 0, 24, 0, 0, 29, 29, 0, 28, 0, 28)); + __m512i test_mm512_lzcnt_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_lzcnt_epi64 - // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <8 x i64> %{{.*}}, zeroinitializer + // CHECK: select <8 x i1> [[ISZERO]], <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_lzcnt_epi64(__A); } + +TEST_CONSTEXPR(match_v8di(_mm512_lzcnt_epi64((__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 60, 59, 58, 57, 56)); +TEST_CONSTEXPR(match_v8di(_mm512_lzcnt_epi64((__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0}), 64, 64, 64, 64, 64, 64, 64, 64)); + __m512i test_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_lzcnt_epi64 - // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <8 x i64> %{{.*}}, zeroinitializer + // CHECK: select <8 x i1> [[ISZERO]], <8 x i64> %{{.*}}, <8 x i64> %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_lzcnt_epi64(__W,__U,__A); } + +TEST_CONSTEXPR(match_v8di(_mm512_mask_lzcnt_epi64(_mm512_set1_epi64((long long) 64), /*0101 0111=*/0x57, (__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 64, 59, 64, 57, 64)); + __m512i test_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_lzcnt_epi64 - // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <8 x i64> %{{.*}}, zeroinitializer + // CHECK: select <8 x i1> [[ISZERO]], <8 x i64> %{{.*}}, <8 x i64> %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_lzcnt_epi64(__U,__A); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_lzcnt_epi64(/*0101 0111=*/0x57, (__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 0, 59, 0, 57, 0)); + __m512i test_mm512_broadcastmb_epi64(__m512i a, __m512i b) { // CHECK-LABEL: test_mm512_broadcastmb_epi64 // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c index 06f7507bde53..cf27e3073fda 100644 --- a/clang/test/CodeGen/X86/avx512copy-builtins.c +++ b/clang/test/CodeGen/X86/avx512copy-builtins.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2 \ // RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 47e5a3ffeeff..df096e3607f3 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -243,6 +248,7 @@ __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) { // CHECK: mul <8 x i64> return (__m512i) _mm512_mullo_epi64(__A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mullo_epi64((__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, -30, -42, +56, -72)); __m512i test_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_mullo_epi64 @@ -250,6 +256,7 @@ __m512i test_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return (__m512i) _mm512_mask_mullo_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_mullo_epi64((__m512i)(__v8di){-100, +200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, -4, +5, -6, +7, -8, +9}), -2, -6, -12, -20, -500, +600, -700, +800)); __m512i test_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_mullo_epi64 @@ -257,6 +264,7 @@ __m512i test_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return (__m512i) _mm512_maskz_mullo_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_mullo_epi64(0x0F, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, 0, 0, 0, 0)); __m512d test_mm512_xor_pd (__m512d __A, __m512d __B) { // CHECK-LABEL: test_mm512_xor_pd @@ -594,6 +602,8 @@ __m512d test_mm512_cvtepi64_pd(__m512i __A) { return _mm512_cvtepi64_pd(__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtepi64_pd((__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, -1.0, 2.0, 2.0, -4.0, -4.0, 8.0, 8.0)); + __m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi64_pd // CHECK: sitofp <8 x i64> %{{.*}} to <8 x double> @@ -601,6 +611,8 @@ __m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { return _mm512_mask_cvtepi64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepi64_pd(_mm512_set1_pd(-777.0), /*1010 0101=*/0xa5, (__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, -777.0, 2.0, -777.0, -777.0, -4.0, -777.0, 8.0)); + __m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_cvtepi64_pd // CHECK: sitofp <8 x i64> %{{.*}} to <8 x double> @@ -608,6 +620,8 @@ __m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { return _mm512_maskz_cvtepi64_pd(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtepi64_pd(/*1010 0101=*/0xa5, (__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, 0.0, 2.0, 0.0, 0.0, -4.0, 0.0, 8.0)); + __m512d test_mm512_cvt_roundepi64_pd(__m512i __A) { // CHECK-LABEL: test_mm512_cvt_roundepi64_pd // CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 @@ -818,6 +832,8 @@ __m512d test_mm512_cvtepu64_pd(__m512i __A) { return _mm512_cvtepu64_pd(__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtepu64_pd((__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0)); + __m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu64_pd // CHECK: uitofp <8 x i64> %{{.*}} to <8 x double> @@ -825,6 +841,8 @@ __m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { return _mm512_mask_cvtepu64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepu64_pd(_mm512_set1_pd(-777.0), /*1010 0101=*/0xa5, (__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, -777.0, 2.0, -777.0, -777.0, 4.0, -777.0, 8.0)); + __m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_cvtepu64_pd // CHECK: uitofp <8 x i64> %{{.*}} to <8 x double> @@ -832,6 +850,8 @@ __m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { return _mm512_maskz_cvtepu64_pd(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtepu64_pd(/*1010 0101=*/0xa5, (__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 8.0)); + __m512d test_mm512_cvt_roundepu64_pd(__m512i __A) { // CHECK-LABEL: test_mm512_cvt_roundepu64_pd // CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 29916be23cba..f93216e546a6 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -608,6 +608,7 @@ __m512d test_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) return _mm512_fmadd_pd(__A, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 86.0, 106.0, 128.0, 152.0)); __m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_mask_fmadd_pd // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) @@ -615,6 +616,8 @@ __m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fmadd_pd(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, 86.0, 106.0, 128.0, 152.0)); +TEST_CONSTEXPR(match_m512d(_mm512_mask_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 5.0, 6.0, 7.0, 8.0)); __m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { // CHECK-LABEL: test_mm512_mask3_fmadd_pd // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) @@ -622,19 +625,24 @@ __m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fmadd_pd(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m512d(_mm512_mask3_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, 86.0, 106.0, 128.0, 152.0)); +TEST_CONSTEXPR(match_m512d(_mm512_mask3_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), 26.0, 38.0, 52.0, 68.0, 21.0, 22.0, 23.0, 24.0)); __m512d test_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_maskz_fmadd_pd // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> return _mm512_maskz_fmadd_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_fmadd_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, 86.0, 106.0, 128.0, 152.0)); +TEST_CONSTEXPR(match_m512d(_mm512_maskz_fmadd_pd((__mmask8)0b00001111, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 0.0, 0.0, 0.0, 0.0)); __m512d test_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_fmsub_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) return _mm512_fmsub_pd(__A, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 44.0, 62.0, 82.0, 104.0)); __m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_mask_fmsub_pd // CHECK: fneg <8 x double> %{{.*}} @@ -643,20 +651,45 @@ __m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fmsub_pd(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, 44.0, 62.0, 82.0, 104.0)); +TEST_CONSTEXPR(match_m512d(_mm512_mask_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 5.0, 6.0, 7.0, 8.0)); __m512d test_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_maskz_fmsub_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> return _mm512_maskz_fmsub_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_fmsub_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, 44.0, 62.0, 82.0, 104.0)); +TEST_CONSTEXPR(match_m512d(_mm512_maskz_fmsub_pd((__mmask8)0b00001111, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 0.0, 0.0, 0.0, 0.0)); +__m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm512_mask3_fmsub_pd + // CHECK: fneg <8 x double> %{{.*}} + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask3_fmsub_pd(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m512d(_mm512_mask3_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, 44.0, 62.0, 82.0, 104.0)); +TEST_CONSTEXPR(match_m512d(_mm512_mask3_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), -8.0, 2.0, 14.0, 28.0, 21.0, 22.0, 23.0, 24.0)); __m512d test_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_fnmadd_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) return _mm512_fnmadd_pd(__A, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, -44.0, -62.0, -82.0, -104.0)); +__m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { + // CHECK-LABEL: test_mm512_mask_fnmadd_pd + // CHECK: fneg <8 x double> %{{.*}} + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask_fnmadd_pd(__A, __U, __B, __C); +} +TEST_CONSTEXPR(match_m512d(_mm512_mask_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, -44.0, -62.0, -82.0, -104.0)); +TEST_CONSTEXPR(match_m512d(_mm512_mask_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, 5.0, 6.0, 7.0, 8.0)); __m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { // CHECK-LABEL: test_mm512_mask3_fnmadd_pd // CHECK: fneg <8 x double> %{{.*}} @@ -665,14 +698,18 @@ __m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmas // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fnmadd_pd(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m512d(_mm512_mask3_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, -44.0, -62.0, -82.0, -104.0)); +TEST_CONSTEXPR(match_m512d(_mm512_mask3_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), 8.0, -2.0, -14.0, -28.0, 21.0, 22.0, 23.0, 24.0)); __m512d test_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_maskz_fnmadd_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> return _mm512_maskz_fnmadd_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_fnmadd_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 00.0, -44.0, -62.0, -82.0, -104.0)); +TEST_CONSTEXPR(match_m512d(_mm512_maskz_fnmadd_pd((__mmask8)0b00001111, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, 0.0, 0.0, 0.0, 0.0)); __m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_fnmsub_pd // CHECK: fneg <8 x double> %{{.*}} @@ -680,15 +717,40 @@ __m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) return _mm512_fnmsub_pd(__A, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, -86.0, -106.0, -128.0, -152.0)); +__m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { + // CHECK-LABEL: test_mm512_mask_fnmsub_pd + // CHECK: fneg <8 x double> %{{.*}} + // CHECK: fneg <8 x double> %{{.*}} + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask_fnmsub_pd(__A, __U, __B, __C); +} +TEST_CONSTEXPR(match_m512d(_mm512_mask_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, -86.0, -106.0, -128.0, -152.0)); +TEST_CONSTEXPR(match_m512d(_mm512_mask_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, 5.0, 6.0, 7.0, 8.0)); +__m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm512_mask3_fnmsub_pd + // CHECK: fneg <8 x double> %{{.*}} + // CHECK: fneg <8 x double> %{{.*}} + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m512d(_mm512_mask3_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, -86.0, -106.0, -128.0, -152.0)); +TEST_CONSTEXPR(match_m512d(_mm512_mask3_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), -26.0, -38.0, -52.0, -68.0, 21.0, 22.0, 23.0, 24.0)); __m512d test_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_maskz_fnmsub_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: fneg <8 x double> %{{.*}} // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> return _mm512_maskz_fnmsub_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_fnmsub_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, -86.0, -106.0, -128.0, -152.0)); +TEST_CONSTEXPR(match_m512d(_mm512_maskz_fnmsub_pd((__mmask8)0b00001111, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, 0.0, 0.0, 0.0, 0.0)); __m512 test_mm512_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_fmadd_round_ps // CHECK: @llvm.x86.avx512.vfmadd.ps.512 @@ -780,6 +842,7 @@ __m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) { // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_fmadd_ps(__A, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 50.0, 70.0, 92.0, 116.0, 142.0, 170.0, 200.0, 232.0, 266.0, 302.0, 340.0, 380.0, 422.0, 466.0, 512.0, 560.0)); __m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_mask_fmadd_ps // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) @@ -787,6 +850,8 @@ __m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fmadd_ps(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_mask_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b1111111100000000, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 266.0, 302.0, 340.0, 380.0, 422.0, 466.0, 512.0, 560.0)); +TEST_CONSTEXPR(match_m512(_mm512_mask_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b0000000011111111, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 50.0, 70.0, 92.0, 116.0, 142.0, 170.0, 200.0, 232.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0)); __m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { // CHECK-LABEL: test_mm512_mask3_fmadd_ps // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) @@ -794,19 +859,24 @@ __m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 _ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fmadd_ps(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m512(_mm512_mask3_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b1111111100000000), 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 266.0, 302.0, 340.0, 380.0, 422.0, 466.0, 512.0, 560.0)); +TEST_CONSTEXPR(match_m512(_mm512_mask3_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b0000000011111111), 50.0, 70.0, 92.0, 116.0, 142.0, 170.0, 200.0, 232.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0)); __m512 test_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_maskz_fmadd_ps // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> return _mm512_maskz_fmadd_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_fmadd_ps((__mmask16)0b1111111100000000, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 266.0, 302.0, 340.0, 380.0, 422.0, 466.0, 512.0, 560.0)); +TEST_CONSTEXPR(match_m512(_mm512_maskz_fmadd_ps((__mmask16)0b0000000011111111, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 50.0, 70.0, 92.0, 116.0, 142.0, 170.0, 200.0, 232.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)); __m512 test_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_fmsub_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_fmsub_ps(__A, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -16.0, 2.0, 22.0, 44.0, 68.0, 94.0, 122.0, 152.0, 184.0, 218.0, 254.0, 292.0, 332.0, 374.0, 418.0, 464.0)); __m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_mask_fmsub_ps // CHECK: fneg <16 x float> %{{.*}} @@ -815,20 +885,45 @@ __m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fmsub_ps(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_mask_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b1111111100000000, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 184.0, 218.0, 254.0, 292.0, 332.0, 374.0, 418.0, 464.0)); +TEST_CONSTEXPR(match_m512(_mm512_mask_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b0000000011111111, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -16.0, 2.0, 22.0, 44.0, 68.0, 94.0, 122.0, 152.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0)); +__m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { + // CHECK-LABEL: test_mm512_mask3_fmsub_ps + // CHECK: fneg <16 x float> %{{.*}} + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask3_fmsub_ps(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m512(_mm512_mask3_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b1111111100000000), 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 184.0, 218.0, 254.0, 292.0, 332.0, 374.0, 418.0, 464.0)); +TEST_CONSTEXPR(match_m512(_mm512_mask3_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b0000000011111111), -16.0, 2.0, 22.0, 44.0, 68.0, 94.0, 122.0, 152.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0)); __m512 test_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_maskz_fmsub_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> return _mm512_maskz_fmsub_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_fmsub_ps((__mmask16)0b1111111100000000, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 184.0, 218.0, 254.0, 292.0, 332.0, 374.0, 418.0, 464.0)); +TEST_CONSTEXPR(match_m512(_mm512_maskz_fmsub_ps((__mmask16)0b0000000011111111, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -16.0, 2.0, 22.0, 44.0, 68.0, 94.0, 122.0, 152.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)); __m512 test_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_fnmadd_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_fnmadd_ps(__A, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 16.0, -2.0, -22.0, -44.0, -68.0, -94.0, -122.0, -152.0, -184.0, -218.0, -254.0, -292.0, -332.0, -374.0, -418.0, -464.0)); +__m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { + // CHECK-LABEL: test_mm512_mask_fnmadd_ps + // CHECK: fneg <16 x float> %{{.*}} + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_fnmadd_ps(__A, __U, __B, __C); +} +TEST_CONSTEXPR(match_m512(_mm512_mask_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b1111111100000000, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, -184.0, -218.0, -254.0, -292.0, -332.0, -374.0, -418.0, -464.0)); +TEST_CONSTEXPR(match_m512(_mm512_mask_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b0000000011111111, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 16.0, -2.0, -22.0, -44.0, -68.0, -94.0, -122.0, -152.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0)); __m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { // CHECK-LABEL: test_mm512_mask3_fnmadd_ps // CHECK: fneg <16 x float> %{{.*}} @@ -837,14 +932,18 @@ __m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fnmadd_ps(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m512(_mm512_mask3_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b1111111100000000), 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, -184.0, -218.0, -254.0, -292.0, -332.0, -374.0, -418.0, -464.0)); +TEST_CONSTEXPR(match_m512(_mm512_mask3_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b0000000011111111), 16.0, -2.0, -22.0, -44.0, -68.0, -94.0, -122.0, -152.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0)); __m512 test_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_maskz_fnmadd_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> return _mm512_maskz_fnmadd_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_fnmadd_ps((__mmask16)0b1111111100000000, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -184.0, -218.0, -254.0, -292.0, -332.0, -374.0, -418.0, -464.0)); +TEST_CONSTEXPR(match_m512(_mm512_maskz_fnmadd_ps((__mmask16)0b0000000011111111, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 16.0, -2.0, -22.0, -44.0, -68.0, -94.0, -122.0, -152.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)); __m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_fnmsub_ps // CHECK: fneg <16 x float> %{{.*}} @@ -852,15 +951,40 @@ __m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) { // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_fnmsub_ps(__A, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -50.0, -70.0, -92.0, -116.0, -142.0, -170.0, -200.0, -232.0, -266.0, -302.0, -340.0, -380.0, -422.0, -466.0, -512.0, -560.0)); +__m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { + // CHECK-LABEL: test_mm512_mask_fnmsub_ps + // CHECK: fneg <16 x float> %{{.*}} + // CHECK: fneg <16 x float> %{{.*}} + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_fnmsub_ps(__A, __U, __B, __C); +} +TEST_CONSTEXPR(match_m512(_mm512_mask_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b1111111100000000, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, -266.0, -302.0, -340.0, -380.0, -422.0, -466.0, -512.0, -560.0)); +TEST_CONSTEXPR(match_m512(_mm512_mask_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b0000000011111111, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -50.0, -70.0, -92.0, -116.0, -142.0, -170.0, -200.0, -232.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0)); +__m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { + // CHECK-LABEL: test_mm512_mask3_fnmsub_ps + // CHECK: fneg <16 x float> %{{.*}} + // CHECK: fneg <16 x float> %{{.*}} + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m512(_mm512_mask3_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b1111111100000000), 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, -266.0, -302.0, -340.0, -380.0, -422.0, -466.0, -512.0, -560.0)); +TEST_CONSTEXPR(match_m512(_mm512_mask3_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b0000000011111111), -50.0, -70.0, -92.0, -116.0, -142.0, -170.0, -200.0, -232.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0)); __m512 test_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_maskz_fnmsub_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> return _mm512_maskz_fnmsub_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_fnmsub_ps((__mmask16)0b1111111100000000, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -266.0, -302.0, -340.0, -380.0, -422.0, -466.0, -512.0, -560.0)); +TEST_CONSTEXPR(match_m512(_mm512_maskz_fnmsub_ps((__mmask16)0b0000000011111111, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -50.0, -70.0, -92.0, -116.0, -142.0, -170.0, -200.0, -232.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)); __m512d test_mm512_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_fmaddsub_round_pd // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 @@ -1069,14 +1193,6 @@ __m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, _ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm512_mask3_fmsub_pd - // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} - return _mm512_mask3_fmsub_pd(__A, __B, __C, __U); -} __m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { // CHECK-LABEL: test_mm512_mask3_fmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} @@ -1085,14 +1201,6 @@ __m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mma // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: test_mm512_mask3_fmsub_ps - // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} - return _mm512_mask3_fmsub_ps(__A, __B, __C, __U); -} __m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { // CHECK-LABEL: test_mm512_mask3_fmsubadd_round_pd // CHECK: fneg <8 x double> %{{.*}} @@ -1133,14 +1241,6 @@ __m512d test_mm512_mask_fnmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: test_mm512_mask_fnmadd_pd - // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} - return _mm512_mask_fnmadd_pd(__A, __U, __B, __C); -} __m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_mask_fnmadd_round_ps // CHECK: fneg <16 x float> %{{.*}} @@ -1149,14 +1249,6 @@ __m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: test_mm512_mask_fnmadd_ps - // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} - return _mm512_mask_fnmadd_ps(__A, __U, __B, __C); -} __m512d test_mm512_mask_fnmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { // CHECK-LABEL: test_mm512_mask_fnmsub_round_pd // CHECK: fneg <8 x double> @@ -1175,24 +1267,6 @@ __m512d test_mm512_mask3_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: test_mm512_mask_fnmsub_pd - // CHECK: fneg <8 x double> %{{.*}} - // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} - return _mm512_mask_fnmsub_pd(__A, __U, __B, __C); -} -__m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm512_mask3_fnmsub_pd - // CHECK: fneg <8 x double> %{{.*}} - // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) - // CHECK: bitcast i8 %{{.*}} to <8 x i1> - // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} - return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U); -} __m512 test_mm512_mask_fnmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { // CHECK-LABEL: test_mm512_mask_fnmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} @@ -1211,24 +1285,6 @@ __m512 test_mm512_mask3_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mm // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: test_mm512_mask_fnmsub_ps - // CHECK: fneg <16 x float> %{{.*}} - // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} - return _mm512_mask_fnmsub_ps(__A, __U, __B, __C); -} -__m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: test_mm512_mask3_fnmsub_ps - // CHECK: fneg <16 x float> %{{.*}} - // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) - // CHECK: bitcast i16 %{{.*}} to <16 x i1> - // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} - return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U); -} __mmask16 test_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_cmpeq_epi32_mask @@ -2748,6 +2804,7 @@ __m512i test_mm512_mask_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m5 // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_and_epi32(__src, __k,__a, __b); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_and_epi32((__m512i)(__v16si){1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000}, (__mmask16)0x0001, (__m512i)(__v16si){7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}, (__m512i)(__v16si){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}), 3, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000)); __m512i test_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_maskz_and_epi32 @@ -2764,6 +2821,7 @@ __m512i test_mm512_mask_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m51 // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[AND_RES]], <8 x i64> %{{.*}} return _mm512_mask_and_epi64(__src, __k,__a, __b); } +TEST_CONSTEXPR(match_m512i(_mm512_mask_and_epi64((__m512i){1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000}, (__mmask8)0x01, (__m512i){7, 7, 7, 7, 7, 7, 7, 7}, (__m512i){3, 3, 3, 3, 3, 3, 3, 3}), 3, 1000, 1000, 1000, 1000, 1000, 1000, 1000)); __m512i test_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_maskz_and_epi64 @@ -3091,6 +3149,7 @@ __m512i test_mm512_maskz_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_mullo_epi32(__k,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_mullo_epi32(0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-2, +3, +4, +5, -6, +7, +8, +9, -10, +11, +12, +13, -14, +15, +16, +17}), -2, -6, +12, -20, -30, -42, +56, -72, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_mullo_epi32 @@ -3098,18 +3157,21 @@ __m512i test_mm512_mask_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_mullo_epi32(__src,__k,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_mullo_epi32((__m512i)(__v16si){-100, +200, -300, +400, -500, +600, -700, +800, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600}, 0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-2, +3, +4, +5, -6, +7, +8, +9, -10, +11, +12, +13, -14, +15, +16, +17}), -2, -6, +12, -20, -30, -42, +56, -72, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600)); __m512i test_mm512_mullo_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mullo_epi32 //CHECK: mul <16 x i32> return _mm512_mullo_epi32(__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_mullo_epi32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-2, +3, +4, +5, -6, +7, +8, +9, -10, +11, +12, +13, -14, +15, +16, +17}), -2, -6, +12, -20, -30, -42, +56, -72, -90, -110, +132, -156, -182, -210, +240, -272)); __m512i test_mm512_mullox_epi64 (__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mullox_epi64 // CHECK: mul <8 x i64> return (__m512i) _mm512_mullox_epi64(__A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mullox_epi64((__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, -30, -42, +56, -72)); __m512i test_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_mullox_epi64 @@ -3117,6 +3179,7 @@ __m512i test_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return (__m512i) _mm512_mask_mullox_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_mullox_epi64((__m512i)(__v8di){-100, +200, -300, +400, -500, +600, -700, +800}, 0x00FF, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, -30, -42, +56, -72)); __m512d test_mm512_add_round_pd(__m512d __A, __m512d __B) { // CHECK-LABEL: test_mm512_add_round_pd @@ -4002,7 +4065,6 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) { // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> return _mm512_cvtepu32_epi64(__X); } - TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292)); __m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { @@ -4024,7 +4086,6 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) { // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> return _mm512_cvtepu16_epi32(__A); } - TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524)); __m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { @@ -4046,7 +4107,6 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) { // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> return _mm512_cvtepu16_epi64(__A); } - TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532)); __m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { @@ -4063,12 +4123,12 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return _mm512_maskz_cvtepu16_epi64(__U, __A); } - __m512i test_mm512_rol_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_rol_epi32 // CHECK: @llvm.fshl.v16i32 return _mm512_rol_epi32(__A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_rol_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 96, -97, -129, 192, -193, 256, 288, -289, 352, -353, 416, -417, 480, -481)); __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_rol_epi32 @@ -4076,6 +4136,7 @@ __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rol_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_rol_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 99, 99, -129, 192, -193, 99, 99, 99, 99, -353, 99, 99, 480, -481)); __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_rol_epi32 @@ -4083,12 +4144,14 @@ __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rol_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_rol_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 96, -97, 0, 0, 0, 256, 288, -289, 352, 0, 416, -417, 0, 0)); __m512i test_mm512_rol_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_rol_epi64 // CHECK: @llvm.fshl.v8i64 return _mm512_rol_epi64(__A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_rol_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256)); __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_rol_epi64 @@ -4096,6 +4159,7 @@ __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rol_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_rol_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99)); __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_rol_epi64 @@ -4103,12 +4167,14 @@ __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rol_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_rol_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0)); __m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_rolv_epi32 // CHECK: @llvm.fshl.v16i32 return _mm512_rolv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_rolv_epi32((__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, -32769, 786432, -24577, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 60, 2147483640)); __m512i test_mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_rolv_epi32 @@ -4116,6 +4182,7 @@ __m512i test_mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rolv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_rolv_epi32((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 999, 786432, 999, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 999, 2147483640)); __m512i test_mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_rolv_epi32 @@ -4123,12 +4190,14 @@ __m512i test_mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rolv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_rolv_epi32(0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 0, 786432, 0, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 0, 2147483640)); __m512i test_mm512_rolv_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_rolv_epi64 // CHECK: @llvm.fshl.v8i64 return _mm512_rolv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_rolv_epi64((__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, -129, 864691128455135232LL, -97, 5764607523034234880LL, -4611686018427387904LL, -25, -15)); __m512i test_mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_rolv_epi64 @@ -4136,6 +4205,7 @@ __m512i test_mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rolv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_rolv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, 999, 999, -97, 5764607523034234880LL, 999, 999, 999)); __m512i test_mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_rolv_epi64 @@ -4143,12 +4213,14 @@ __m512i test_mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rolv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_rolv_epi64(0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, 0, 0, -97, 5764607523034234880LL, 0, 0, 0)); __m512i test_mm512_ror_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_ror_epi32 // CHECK: @llvm.fshr.v16i32 return _mm512_ror_epi32(__A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_ror_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824, 1207959552, -1207959553, 1476395008, -1476395009, 1744830464, -1744830465, 2013265920, -2013265921)); __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_ror_epi32 @@ -4156,6 +4228,7 @@ __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_ror_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_ror_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99, 99, 99, 99, -1476395009, 99, 99, 2013265920, -2013265921)); __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_ror_epi32 @@ -4163,12 +4236,14 @@ __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_ror_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_ror_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 402653184, -402653185, 0, 0, 0, 1073741824, 1207959552, -1207959553, 1476395008, 0, 1744830464, -1744830465, 0, 0)); __m512i test_mm512_ror_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_ror_epi64 // CHECK: @llvm.fshr.v8i64 return _mm512_ror_epi64(__A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_ror_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, -1729382256910270465LL, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 4611686018427387904LL)); __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_ror_epi64 @@ -4176,6 +4251,7 @@ __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_ror_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_ror_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 99, 99, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 99)); __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_ror_epi64 @@ -4183,13 +4259,14 @@ __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_ror_epi64(__U, __A, 5); } - +TEST_CONSTEXPR(match_v8di(_mm512_maskz_ror_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, 0, -2305843009213693953LL, 3458764513820540928LL, 0, 0)); __m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_rorv_epi32 // CHECK: @llvm.fshr.v16i32 return _mm512_rorv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_rorv_epi32((__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, -131073, 49152, -1572865, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, -1073741821, -31)); __m512i test_mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_rorv_epi32 @@ -4197,6 +4274,7 @@ __m512i test_mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rorv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_rorv_epi32((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 999, 49152, 999, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, 999, -31)); __m512i test_mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_rorv_epi32 @@ -4204,12 +4282,14 @@ __m512i test_mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rorv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_rorv_epi32(0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 0, 49152, 0, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, 0, -31)); __m512i test_mm512_rorv_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_rorv_epi64 // CHECK: @llvm.fshr.v8i64 return _mm512_rorv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_rorv_epi64((__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, -144115188075855873LL, 192, -1729382256910270465LL, 80, 48, 9223372036854775806LL, 9223372036854775804LL)); __m512i test_mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_rorv_epi64 @@ -4217,6 +4297,7 @@ __m512i test_mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rorv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_rorv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, 999, 999, -1729382256910270465LL, 80, 999, 999, 999)); __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_rorv_epi64 @@ -4224,12 +4305,18 @@ __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rorv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_rorv_epi64(0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, 0, 0, -1729382256910270465LL, 80, 0, 0, 0)); __m512i test_mm512_slli_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_slli_epi32 // CHECK: @llvm.x86.avx512.pslli.d.512 return _mm512_slli_epi32(__A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 0), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e)); +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10), 0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00)); +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_slli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 33), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_slli_epi32_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_slli_epi32_2 @@ -4243,6 +4330,7 @@ __m512i test_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_slli_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_slli_epi32((__m512i)(__v16si){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, (__mmask16)0x5555, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x0, 101, 0x4, 103, 0x8, 105, 0xc, 107, 0x10, 109, 0x14, 111, 0x18, 113, 0x1c, 115)); __m512i test_mm512_mask_slli_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_slli_epi32_2 @@ -4257,6 +4345,11 @@ __m512i test_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_slli_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x00ffcc71, (__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0, (__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0xffff, (__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x7fff, (__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_slli_epi32((__mmask16)0x71cc, (__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0, 0, 0x4, 0x6, 0, 0, 0xc, 0xe, 0x10, 0, 0, 0, 0x18, 0x1a, 0x1c, 0)); __m512i test_mm512_maskz_slli_epi32_2(__mmask16 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_maskz_slli_epi32_2 @@ -4270,6 +4363,7 @@ __m512i test_mm512_slli_epi64(__m512i __A) { // CHECK: @llvm.x86.avx512.pslli.q.512 return _mm512_slli_epi64(__A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_slli_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe)); __m512i test_mm512_slli_epi64_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_slli_epi64_2 @@ -4283,6 +4377,7 @@ __m512i test_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_slli_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_slli_epi64((__m512i)(__v8di){100, 101, 102, 103, 104, 105, 106, 107}, (__mmask8)0x0F, (__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 2), 0x0, 0x4, 0x8, 0xc, 104, 105, 106, 107)); __m512i test_mm512_mask_slli_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_slli_epi64_2 @@ -4297,6 +4392,11 @@ __m512i test_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_slli_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x00ffcc71, (__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 64), 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0, (__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 16), 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0xff, (__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x7f, (__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_slli_epi64((__mmask8)0x71, (__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x1fe, 0, 0, 0, 0x8, 0xa, 0xc, 0)); __m512i test_mm512_maskz_slli_epi64_2(__mmask8 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_maskz_slli_epi64_2 @@ -4310,6 +4410,7 @@ __m512i test_mm512_srli_epi32(__m512i __A) { // CHECK: @llvm.x86.avx512.psrli.d.512 return _mm512_srli_epi32(__A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_srli_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10), 0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_srli_epi32_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_srli_epi32_2 @@ -4323,6 +4424,7 @@ __m512i test_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srli_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_srli_epi32((__m512i)(__v16si){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, (__mmask16)0x5555, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x0, 101, 0x1, 103, 0x2, 105, 0x3, 107, 0x4, 109, 0x5, 111, 0x6, 113, 0x7, 115)); __m512i test_mm512_mask_srli_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_srli_epi32_2 @@ -4337,6 +4439,7 @@ __m512i test_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srli_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srli_epi32((__mmask16)0x71cc, (__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0, 0, 0x1, 0x1, 0, 0, 0x3, 0x3, 0x4, 0, 0, 0, 0x6, 0x6, 0x7, 0)); __m512i test_mm512_maskz_srli_epi32_2(__mmask16 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_maskz_srli_epi32_2 @@ -4350,6 +4453,7 @@ __m512i test_mm512_srli_epi64(__m512i __A) { // CHECK: @llvm.x86.avx512.psrli.q.512 return _mm512_srli_epi64(__A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_srli_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3)); __m512i test_mm512_srli_epi64_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_srli_epi64_2 @@ -4363,6 +4467,7 @@ __m512i test_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srli_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_srli_epi64((__m512i)(__v8di){100, 101, 102, 103, 104, 105, 106, 107}, (__mmask8)0x0F, (__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 2), 0x0, 0x0, 0x0, 0x0, 104, 105, 106, 107)); __m512i test_mm512_mask_srli_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_srli_epi64_2 @@ -4377,6 +4482,7 @@ __m512i test_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srli_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_srli_epi64((__mmask8)0x71, (__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x7f, 0, 0, 0, 0x2, 0x2, 0x3, 0)); __m512i test_mm512_maskz_srli_epi64_2(__mmask8 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_maskz_srli_epi64_2 @@ -5756,6 +5862,7 @@ __m512i test_mm512_srai_epi32(__m512i __A) { // CHECK: @llvm.x86.avx512.psrai.d.512 return _mm512_srai_epi32(__A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_srai_epi32((__m512i)(__v16si){0, -2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 10), 0, -1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_srai_epi32_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_srai_epi32_2 @@ -5769,6 +5876,7 @@ __m512i test_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srai_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_srli_epi32((__m512i)(__v16si){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, (__mmask16)0x5555, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x0, 101, 0x1, 103, 0x2, 105, 0x3, 107, 0x4, 109, 0x5, 111, 0x6, 113, 0x7, 115)); __m512i test_mm512_mask_srai_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_srai_epi32_2 @@ -5783,6 +5891,7 @@ __m512i test_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srai_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srai_epi32((__mmask16)0x71cc, (__m512i)(__v16si){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0, 0, 0x1, 0x1, 0, 0, 0x3, 0x3, 0x4, 0, 0, 0, 0x6, 0x6, 0x7, 0)); __m512i test_mm512_maskz_srai_epi32_2(__mmask16 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_maskz_srai_epi32_2 @@ -5796,6 +5905,7 @@ __m512i test_mm512_srai_epi64(__m512i __A) { // CHECK: @llvm.x86.avx512.psrai.q.512 return _mm512_srai_epi64(__A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_srai_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3)); __m512i test_mm512_srai_epi64_2(__m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_srai_epi64_2 @@ -5809,6 +5919,7 @@ __m512i test_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srai_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_srai_epi64((__m512i)(__v8di){100, 101, 102, 103, 104, 105, 106, 107}, (__mmask8)0x0F, (__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, 2), 0x0, 0x0, 0x0, 0x0, 104, 105, 106, 107)); __m512i test_mm512_mask_srai_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_mask_srai_epi64_2 @@ -5823,6 +5934,7 @@ __m512i test_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srai_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_srai_epi64((__mmask8)0x71, (__m512i)(__v8di){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x7f, 0, 0, 0, 0x2, 0x2, 0x3, 0)); __m512i test_mm512_maskz_srai_epi64_2(__mmask8 __U, __m512i __A, unsigned int __B) { // CHECK-LABEL: test_mm512_maskz_srai_epi64_2 @@ -5876,6 +5988,7 @@ __m512i test_mm512_sllv_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psllv.d.512 return _mm512_sllv_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_sllv_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 2, -8, 24, -64, 0, 0, 0, 0, 536870912, 0, -2147483648, 0, 80, -48, 28, -16)); __m512i test_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_sllv_epi32 @@ -5883,6 +5996,7 @@ __m512i test_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sllv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_sllv_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, -8, 99, -64, 99, 0, 99, 99, 536870912, 99, 99, 99, 80, 99, 99, -16)); __m512i test_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_sllv_epi32 @@ -5890,12 +6004,14 @@ __m512i test_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sllv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_sllv_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 2, -8, 24, -64, 0, 0, 0, 0, 0, 0, -2147483648, 0, 80, -48, 0, 0)); __m512i test_mm512_sllv_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_sllv_epi64 // CHECK: @llvm.x86.avx512.psllv.q.512 return _mm512_sllv_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_sllv_epi64((__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 32, -68, 0, 0, 0, 1344, 0, 0)); __m512i test_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_sllv_epi64 @@ -5903,6 +6019,7 @@ __m512i test_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_sllv_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_sllv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 999, 999, 0, 999, 999, 1344, 0, 0)); __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_sllv_epi64 @@ -5910,6 +6027,7 @@ __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_sllv_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_sllv_epi64(0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 0, 0, 0, 0, 0, 1344, 0, 0)); __m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_sra_epi32 @@ -5956,6 +6074,7 @@ __m512i test_mm512_srav_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psrav.d.512 return _mm512_srav_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_srav_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -4)); __m512i test_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srav_epi32 @@ -5963,6 +6082,7 @@ __m512i test_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srav_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_srav_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, -1, 99, -1, 99, -1, 99, 99, 0, 99, 99, 99, 0, 99, 99, -4)); __m512i test_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srav_epi32 @@ -5970,12 +6090,14 @@ __m512i test_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srav_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srav_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, -1, 0, 0)); __m512i test_mm512_srav_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_srav_epi64 // CHECK: @llvm.x86.avx512.psrav.q.512 return _mm512_srav_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_srav_epi64((__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 8, -5, 0, -1, 0, 0, 0, 0)); __m512i test_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srav_epi64 @@ -5983,6 +6105,7 @@ __m512i test_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srav_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_srav_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 999, 999, 0, 999, 999, 0, 0, 0)); __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srav_epi64 @@ -5990,6 +6113,7 @@ __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srav_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_srav_epi64(0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_srl_epi32 @@ -6036,6 +6160,7 @@ __m512i test_mm512_srlv_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psrlv.d.512 return _mm512_srlv_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_srlv_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, 1073741823, 0, 268435455, 0, 1, 0, 7, 0, 0, 0, 0, 0, 536870911, 1, 2147483644)); __m512i test_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srlv_epi32 @@ -6043,6 +6168,7 @@ __m512i test_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srlv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_srlv_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, 1073741823, 99, 268435455, 99, 1, 99, 99, 0, 99, 99, 99, 0, 99, 99, 2147483644)); __m512i test_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srlv_epi32 @@ -6050,12 +6176,14 @@ __m512i test_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srlv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srlv_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, 1073741823, 0, 268435455, 0, 0, 0, 7, 0, 0, 0, 0, 0, 536870911, 0, 0)); __m512i test_mm512_srlv_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_srlv_epi64 // CHECK: @llvm.x86.avx512.psrlv.q.512 return _mm512_srlv_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_srlv_epi64((__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}), 8, 4, 2305843009213693949, 1, 0, 0, 0, 72057594037927935)); __m512i test_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srlv_epi64 @@ -6063,6 +6191,7 @@ __m512i test_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srlv_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_srlv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x28, (__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}), 999, 999, 999, 1, 999, 0, 999, 999)); __m512i test_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srlv_epi64 @@ -6070,6 +6199,7 @@ __m512i test_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srlv_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_srlv_epi64(0x28, (__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}), 0, 0, 0, 1, 0, 0, 0, 0)); __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_ternarylogic_epi32 @@ -8934,6 +9064,8 @@ __m512d test_mm512_cvtps_pd(__m256 __A) { return _mm512_cvtps_pd(__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtps_pd((__m256){0.0f, 1.0f, 2.0f, 4.0f, -8.0f, -16.0f, -32.0f, -64.0f}), 0.0, 1.0, 2.0, 4.0, -8.0, -16.0, -32.0, -64.0)); + __m512d test_mm512_cvtpslo_pd(__m512 __A) { // CHECK-LABEL: test_mm512_cvtpslo_pd // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -8941,6 +9073,8 @@ __m512d test_mm512_cvtpslo_pd(__m512 __A) { return _mm512_cvtpslo_pd(__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtpslo_pd((__m512){0.0f, 1.0f, 2.0f, 4.0f, -8.0f, -16.0f, -32.0f, -64.0f, -128.0f, -256.0f, -512.0f, -1024.0f, -2048.0f, -4096.0f, -8192.0f, -16384.0f}), 0.0, 1.0, 2.0, 4.0, -8.0, -16.0, -32.0, -64.0)); + __m512d test_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm512_mask_cvtps_pd // CHECK: fpext <8 x float> %{{.*}} to <8 x double> @@ -8948,6 +9082,8 @@ __m512d test_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) { return _mm512_mask_cvtps_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtps_pd(_mm512_set1_pd(-777.0), /*1010 1101=*/0xad, (__m256){0.0f, 1.0f, 2.0f, 4.0f, -8.0f, -16.0f, -32.0f, -64.0f}), 0.0, -777.0, 2.0, 4.0, -777.0, -16.0, -777.0, -64.0)); + __m512d test_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) { // CHECK-LABEL: test_mm512_mask_cvtpslo_pd // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -8956,6 +9092,9 @@ __m512d test_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) { return _mm512_mask_cvtpslo_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtpslo_pd(_mm512_set1_pd(-777.0), /*1010 1101=*/0xad, (__m512){0.0f, 1.0f, 2.0f, 4.0f, -8.0f, -16.0f, -32.0f, -64.0f, -128.0f, -256.0f, -512.0f, -1024.0f, -2048.0f, -4096.0f, -8192.0f, -16384.0f}), 0.0, -777.0, 2.0, 4.0, -777.0, -16.0, -777.0, -64.0)); + + __m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm512_maskz_cvtps_pd // CHECK: fpext <8 x float> %{{.*}} to <8 x double> @@ -8963,6 +9102,8 @@ __m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { return _mm512_maskz_cvtps_pd(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtps_pd(/*1010 1101=*/0xad, (__m256){0.0f, 1.0f, 2.0f, 4.0f, -8.0f, -16.0f, -32.0f, -64.0f}), 0.0, 0.0, 2.0, 4.0, 0.0, -16.0, 0.0, -64.0)); + __m512d test_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_mov_pd // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} @@ -9238,6 +9379,8 @@ __m256 test_mm512_castps512_ps256 (__m512 __A) return _mm512_castps512_ps256 (__A); } +TEST_CONSTEXPR(match_m256(_mm512_castps512_ps256((__m512){0.0f, 1.0f, 2.0f, 4.0f, -8.0f, -16.0f, -32.0f, -64.0f, -128.0f, -256.0f, -512.0f, -1024.0f, -2048.0f, -4096.0f, -8192.0f, -16384.0f}), 0.0f, 1.0f, 2.0f, 4.0f, -8.0f, -16.0f, -32.0f, -64.0f)); + __m512i test_mm512_castps_si512 (__m512 __A) { // CHECK-LABEL: test_mm512_castps_si512 @@ -9290,6 +9433,8 @@ __m256i test_mm512_castsi512_si256 (__m512i __A) return _mm512_castsi512_si256 (__A); } +TEST_CONSTEXPR(match_v8si(_mm512_castsi512_si256((__m512i)(__v16si){0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}), 0, 1, 2, 4, 8, 16, 32, 64)); + __m128 test_mm_cvt_roundsd_ss(__m128 __A, __m128d __B) { // CHECK-LABEL: test_mm_cvt_roundsd_ss // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round @@ -9437,6 +9582,8 @@ __m512 test_mm512_cvtepu32_ps (__m512i __A) return _mm512_cvtepu32_ps (__A); } +TEST_CONSTEXPR(match_m512(_mm512_cvtepu32_ps((__m512i)(__v16su){0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}), 0.0f, 1.0f, 2.0f, 4.0f, 8.0f, 16.0f, 32.0f, 64.0f, 128.0f, 256.0f, 512.0f, 1024.0f, 2048.0f, 4096.0f, 8192.0f, 16384.0f)); + __m512 test_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu32_ps @@ -9445,6 +9592,8 @@ __m512 test_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) return _mm512_mask_cvtepu32_ps (__W,__U,__A); } +TEST_CONSTEXPR(match_m512(_mm512_mask_cvtepu32_ps(_mm512_set1_ps(-777.0f), /*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16su){0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}), 0.0f, -777.0f, 2.0f, 4.0f, -777.0f, 16.0f, -777.0f, 64.0f, -777.0f, -777.0f, 512.0f, 1024.0f, -777.0f, 4096.0f, -777.0f, 16384.0f)); + __m512 test_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_cvtepu32_ps @@ -9453,6 +9602,8 @@ __m512 test_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) return _mm512_maskz_cvtepu32_ps (__U,__A); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_cvtepu32_ps(/*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16su){0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}), 0.0f, 0.0f, 2.0f, 4.0f, 0.0f, 16.0f, 0.0f, 64.0f, 0.0f, 0.0f, 512.0f, 1024.0f, 0.0f, 4096.0f, 0.0f, 16384.0f)); + __m512d test_mm512_cvtepi32_pd (__m256i __A) { // CHECK-LABEL: test_mm512_cvtepi32_pd @@ -9460,6 +9611,8 @@ __m512d test_mm512_cvtepi32_pd (__m256i __A) return _mm512_cvtepi32_pd (__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtepi32_pd((__m256i)(__v8si){-8, -4, -2, -1, 0, 1, 2, 4}), -8.0, -4.0, -2.0, -1.0, 0.0, 1.0, 2.0, 4.0)); + __m512d test_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi32_pd @@ -9468,6 +9621,8 @@ __m512d test_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) return _mm512_mask_cvtepi32_pd (__W,__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepi32_pd(_mm512_set1_pd(-777.0), /*0101 1100=*/0x5c, (__m256i)(__v8si){-8, -4, -2, -1, 0, 1, 2, 4}), -777.0, -777.0, -2.0, -1.0, 0.0, -777.0, 2.0, -777.0)); + __m512d test_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm512_maskz_cvtepi32_pd @@ -9476,6 +9631,8 @@ __m512d test_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) return _mm512_maskz_cvtepi32_pd (__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtepi32_pd(/*0101 1100=*/0x5c, (__m256i)(__v8si){-8, -4, -2, -1, 0, 1, 2, 4}), 0.0, 0.0, -2.0, -1.0, 0.0, 0.0, 2.0, 0.0)); + __m512d test_mm512_cvtepi32lo_pd (__m512i __A) { // CHECK-LABEL: test_mm512_cvtepi32lo_pd @@ -9484,6 +9641,8 @@ __m512d test_mm512_cvtepi32lo_pd (__m512i __A) return _mm512_cvtepi32lo_pd (__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtepi32lo_pd((__m512i)(__v16si){-128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64}), -128.0, -64.0, -32.0, -16.0, -8.0, -4.0, -2.0, -1.0)); + __m512d test_mm512_mask_cvtepi32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi32lo_pd @@ -9493,6 +9652,8 @@ __m512d test_mm512_mask_cvtepi32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) return _mm512_mask_cvtepi32lo_pd (__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepi32lo_pd(_mm512_set1_pd(-777.0), /*1010 1101=*/0xad, (__m512i)(__v16si){-128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64}), -128.0, -777.0, -32.0, -16.0, -777.0, -4.0, -777.0, -1.0)); + __m512 test_mm512_cvtepi32_ps (__m512i __A) { // CHECK-LABEL: test_mm512_cvtepi32_ps @@ -9500,6 +9661,8 @@ __m512 test_mm512_cvtepi32_ps (__m512i __A) return _mm512_cvtepi32_ps (__A); } +TEST_CONSTEXPR(match_m512(_mm512_cvtepi32_ps((__m512i)(__v16si){-128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64}), -128.0f, -64.0f, -32.0f, -16.0f, -8.0f, -4.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 4.0f, 8.0f, 16.0f, 32.0f, 64.0f)); + __m512 test_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi32_ps @@ -9508,6 +9671,8 @@ __m512 test_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) return _mm512_mask_cvtepi32_ps (__W,__U,__A); } +TEST_CONSTEXPR(match_m512(_mm512_mask_cvtepi32_ps(_mm512_set1_ps(-777.0f), /*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16si){-128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64}), -128.0f, -777.0f, -32.0f, -16.0f, -777.0f, -4.0f, -777.0f, -1.0f, -777.0f, -777.0f, 2.0f, 4.0f, -777.0f, 16.0f, -777.0f, 64.0f)); + __m512 test_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_cvtepi32_ps @@ -9516,6 +9681,8 @@ __m512 test_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) return _mm512_maskz_cvtepi32_ps (__U,__A); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_cvtepi32_ps(/*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16si){-128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64}), -128.0f, 0.0f, -32.0f, -16.0f, 0.0f, -4.0f, 0.0f, -1.0f, 0.0f, 0.0f, 2.0f, 4.0f, 0.0f, 16.0f, 0.0f, 64.0f)); + __m512d test_mm512_cvtepu32_pd(__m256i __A) { // CHECK-LABEL: test_mm512_cvtepu32_pd @@ -9523,6 +9690,8 @@ __m512d test_mm512_cvtepu32_pd(__m256i __A) return _mm512_cvtepu32_pd(__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtepu32_pd((__m256i)(__v8su){0, 1, 2, 4, 8, 16, 32, 64}), 0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0)); + __m512d test_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu32_pd @@ -9531,6 +9700,8 @@ __m512d test_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) return _mm512_mask_cvtepu32_pd (__W,__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepu32_pd(_mm512_set1_pd(-777.0), /*0101 1100=*/0x5c, (__m256i)(__v8su){0, 1, 2, 4, 8, 16, 32, 64}), -777.0, -777.0, 2.0, 4.0, 8.0, -777.0, 32.0, -777.0)); + __m512d test_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm512_maskz_cvtepu32_pd @@ -9539,6 +9710,8 @@ __m512d test_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) return _mm512_maskz_cvtepu32_pd (__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtepu32_pd(/*0101 1100=*/0x5c, (__m256i)(__v8su){0, 1, 2, 4, 8, 16, 32, 64}), 0.0, 0.0, 2.0, 4.0, 8.0, 0.0, 32.0, 0.0)); + __m512d test_mm512_cvtepu32lo_pd (__m512i __A) { // CHECK-LABEL: test_mm512_cvtepu32lo_pd @@ -9547,6 +9720,9 @@ __m512d test_mm512_cvtepu32lo_pd (__m512i __A) return _mm512_cvtepu32lo_pd (__A); } +TEST_CONSTEXPR(match_m512d(_mm512_cvtepu32lo_pd((__m512i)(__v16su){0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}), 0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0)); + + __m512d test_mm512_mask_cvtepu32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepu32lo_pd @@ -9556,6 +9732,8 @@ __m512d test_mm512_mask_cvtepu32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) return _mm512_mask_cvtepu32lo_pd (__W, __U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepu32lo_pd(_mm512_set1_pd(-777.0), /*1010 1101=*/0xad, (__m512i)(__v16su){0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}), 0.0, -777.0, 2.0, 4.0, -777.0, 16.0, -777.0, 64.0)); + __m256 test_mm512_cvtpd_ps (__m512d __A) { // CHECK-LABEL: test_mm512_cvtpd_ps @@ -10020,6 +10198,8 @@ __m512i test_mm512_max_epi32 (__m512i __A, __m512i __B) return _mm512_max_epi32 (__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_max_epi32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16)); + __m512i test_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_max_epi32 @@ -10028,6 +10208,8 @@ __m512i test_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m5 return _mm512_mask_max_epi32 (__W,__M,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_max_epi32((__m512i)(__v16si){-100, +200, -300, +400, -500, +600, -700, +800, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600}, 0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), +1, +2, +3, +4, +5, +6, +7, +8, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600)); + __m512i test_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_max_epi32 @@ -10036,6 +10218,8 @@ __m512i test_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) return _mm512_maskz_max_epi32 (__M,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_max_epi32(0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), +1, +2, +3, +4, +5, +6, +7, +8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_max_epi64 (__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epi64 @@ -10043,6 +10227,8 @@ __m512i test_mm512_max_epi64 (__m512i __A, __m512i __B) return _mm512_max_epi64 (__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_max_epi64((__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-1, +2, -3, +4, -5, +6, -7, +8}), +1, +2, +3, +4, +5, +6, +7, +8)); + __m512i test_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_max_epi64 @@ -10051,6 +10237,8 @@ __m512i test_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m51 return _mm512_mask_max_epi64 (__W,__M,__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_max_epi64((__m512i)(__v8di){-100, +200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-1, +2, -3, +4, -5, +6, -7, +8}), +1, +2, +3, +4, -500, +600, -700, +800)); + __m512i test_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_max_epi64 @@ -10059,6 +10247,8 @@ __m512i test_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) return _mm512_maskz_max_epi64 (__M,__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_max_epi64(0x0F, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-1, +2, -3, +4, -5, +6, -7, +8}), +1, +2, +3, +4, 0, 0, 0, 0)); + __m512i test_mm512_max_epu64 (__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epu64 @@ -10066,6 +10256,8 @@ __m512i test_mm512_max_epu64 (__m512i __A, __m512i __B) return _mm512_max_epu64 (__A,__B); } +TEST_CONSTEXPR(match_m512i(_mm512_max_epu64((__m512i)(__v8du){1, 2, 3, 4, 8, 7, 6, 5}, (__m512i)(__v8du){4, 3, 2, 1, 5, 6, 7, 8}), 4, 3, 3, 4, 8, 7, 7, 8)); + __m512i test_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_max_epu64 @@ -10074,6 +10266,8 @@ __m512i test_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m51 return _mm512_mask_max_epu64 (__W,__M,__A,__B); } +TEST_CONSTEXPR(match_m512i(_mm512_mask_max_epu64((__m512i)(__v8du){100, 200, 300, 400, 500, 600, 700, 800}, 0x0F, (__m512i)(__v8du){1, 2, 3, 4, 8, 7, 6, 5}, (__m512i)(__v8du){4, 3, 2, 1, 5, 6, 7, 8}), 4, 3, 3, 4, 500, 600, 700, 800)); + __m512i test_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_max_epu64 @@ -10082,6 +10276,8 @@ __m512i test_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) return _mm512_maskz_max_epu64 (__M,__A,__B); } +TEST_CONSTEXPR(match_m512i(_mm512_maskz_max_epu64(0x0F, (__m512i)(__v8du){1, 2, 3, 4, 8, 7, 6, 5}, (__m512i)(__v8du){4, 3, 2, 1, 5, 6, 7, 8}), 4, 3, 3, 4, 0, 0, 0, 0)); + __m512i test_mm512_max_epu32 (__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epu32 @@ -10089,6 +10285,8 @@ __m512i test_mm512_max_epu32 (__m512i __A, __m512i __B) return _mm512_max_epu32 (__A,__B); } +TEST_CONSTEXPR(match_v16su(_mm512_max_epu32((__m512i)(__v16su){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); + __m512i test_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_max_epu32 @@ -10097,6 +10295,8 @@ __m512i test_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m5 return _mm512_mask_max_epu32 (__W,__M,__A,__B); } +TEST_CONSTEXPR(match_v16su(_mm512_mask_max_epu32((__m512i)(__v16su){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, 0x00FF, (__m512i)(__v16su){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 1, 2, 3, 4, 5, 6, 7, 8, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600)); + __m512i test_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_max_epu32 @@ -10105,6 +10305,8 @@ __m512i test_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) return _mm512_maskz_max_epu32 (__M,__A,__B); } +TEST_CONSTEXPR(match_v16su(_mm512_maskz_max_epu32(0x00FF, (__m512i)(__v16su){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_min_epi32 (__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_min_epi32 @@ -10112,6 +10314,8 @@ __m512i test_mm512_min_epi32 (__m512i __A, __m512i __B) return _mm512_min_epi32 (__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_min_epi32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16)); + __m512i test_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_min_epi32 @@ -10120,6 +10324,8 @@ __m512i test_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m5 return _mm512_mask_min_epi32 (__W,__M,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_min_epi32((__m512i)(__v16si){-100, +200, -300, +400, -500, +600, -700, +800, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600}, 0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), -1, -2, -3, -4, -5, -6, -7, -8, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600)); + __m512i test_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_min_epi32 @@ -10128,6 +10334,8 @@ __m512i test_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) return _mm512_maskz_min_epi32 (__M,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_min_epi32(0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), -1, -2, -3, -4, -5, -6, -7, -8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_min_epu32 (__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_min_epu32 @@ -10135,6 +10343,8 @@ __m512i test_mm512_min_epu32 (__m512i __A, __m512i __B) return _mm512_min_epu32 (__A,__B); } +TEST_CONSTEXPR(match_v16su(_mm512_min_epu32((__m512i)(__v16su){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); + __m512i test_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_min_epu32 @@ -10143,6 +10353,8 @@ __m512i test_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m5 return _mm512_mask_min_epu32 (__W,__M,__A,__B); } +TEST_CONSTEXPR(match_v16su(_mm512_mask_min_epu32((__m512i)(__v16su){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, 0x00FF, (__m512i)(__v16su){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 2, 3, 4, 5, 6, 7, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600)); + __m512i test_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_min_epu32 @@ -10151,6 +10363,8 @@ __m512i test_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) return _mm512_maskz_min_epu32 (__M,__A,__B); } +TEST_CONSTEXPR(match_v16su(_mm512_maskz_min_epu32(0x00FF, (__m512i)(__v16su){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_min_epi64 (__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_min_epi64 @@ -10158,6 +10372,8 @@ __m512i test_mm512_min_epi64 (__m512i __A, __m512i __B) return _mm512_min_epi64 (__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_min_epi64((__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-1, +2, -3, +4, -5, +6, -7, +8}), -1, -2, -3, -4, -5, -6, -7, -8)); + __m512i test_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_min_epi64 @@ -10166,6 +10382,8 @@ __m512i test_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m51 return _mm512_mask_min_epi64 (__W,__M,__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_min_epi64((__m512i)(__v8di){-100, +200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-1, +2, -3, +4, -5, +6, -7, +8}), -1, -2, -3, -4, -500, +600, -700, +800)); + __m512i test_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_min_epi64 @@ -10174,6 +10392,8 @@ __m512i test_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) return _mm512_maskz_min_epi64 (__M,__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_min_epi64(0x0F, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-1, +2, -3, +4, -5, +6, -7, +8}), -1, -2, -3, -4, 0, 0, 0, 0)); + __m512i test_mm512_min_epu64 (__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_min_epu64 @@ -10181,6 +10401,8 @@ __m512i test_mm512_min_epu64 (__m512i __A, __m512i __B) return _mm512_min_epu64 (__A,__B); } +TEST_CONSTEXPR(match_m512i(_mm512_min_epu64((__m512i)(__v8du){1, 2, 3, 4, 8, 7, 6, 5}, (__m512i)(__v8du){4, 3, 2, 1, 5, 6, 7, 8}), 1, 2, 2, 1, 5, 6, 6, 5)); + __m512i test_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_min_epu64 @@ -10189,6 +10411,8 @@ __m512i test_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m51 return _mm512_mask_min_epu64 (__W,__M,__A,__B); } +TEST_CONSTEXPR(match_m512i(_mm512_mask_min_epu64((__m512i)(__v8du){100, 200, 300, 400, 500, 600, 700, 800}, 0x0F, (__m512i)(__v8du){1, 2, 3, 4, 8, 7, 6, 5}, (__m512i)(__v8du){4, 3, 2, 1, 5, 6, 7, 8}), 1, 2, 2, 1, 500, 600, 700, 800)); + __m512i test_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_min_epu64 @@ -10197,6 +10421,8 @@ __m512i test_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) return _mm512_maskz_min_epu64 (__M,__A,__B); } +TEST_CONSTEXPR(match_m512i(_mm512_maskz_min_epu64(0x0F, (__m512i)(__v8du){1, 2, 3, 4, 8, 7, 6, 5}, (__m512i)(__v8du){4, 3, 2, 1, 5, 6, 7, 8}), 1, 2, 2, 1, 0, 0, 0, 0)); + __m512i test_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) { // CHECK-LABEL: test_mm512_mask_set1_epi32 @@ -10587,6 +10813,8 @@ __m512 test_mm512_set_ps (float __A, float __B, float __C, float __D, } TEST_CONSTEXPR(match_m512(_mm512_set_ps(-16.0f, 15.0f, -14.0f, 13.0f, -12.0f, 11.0f, -10.0f, 9.0f, -8.0f, 7.0f, -6.0f, 5.0f, -4.0f, 3.0f, -2.0f, 1.0f), 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f, 9.0f, -10.0f, 11.0f, -12.0f, 13.0f, -14.0f, 15.0f, -16.0f)); +TEST_CONSTEXPR(match_v8di(_mm512_abs_epi64((__m512i)(__v8di){-1, 2, 2, 2, 2, 2, 2, 2}), 1, 2, 2, 2, 2, 2, 2, 2)); + __m512i test_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_abs_epi64 @@ -10594,6 +10822,7 @@ __m512i test_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[ABS]], <8 x i64> %{{.*}} return _mm512_mask_abs_epi64 (__W,__U,__A); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_abs_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, (__mmask8)0x01, (__m512i)(__v8di){-1, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99)); __m512i test_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) { @@ -10602,6 +10831,9 @@ __m512i test_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[ABS]], <8 x i64> %{{.*}} return _mm512_maskz_abs_epi64 (__U,__A); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_abs_epi64((__mmask8)0x01, (__m512i)(__v8di){-1, 2, 2, 2, 2, 2, 2, 2}), 1, 0, 0, 0, 0, 0, 0, 0)); + +TEST_CONSTEXPR(match_v16si(_mm512_abs_epi32((__m512i)(__v16si){-1, 2, 2, 2, 2, 2, 2, 2, -1, 2, 2, 2, 2, 2, 2, 2}), 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2)); __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { @@ -10612,6 +10844,7 @@ __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[ABS]], <16 x i32> %{{.*}} return _mm512_mask_abs_epi32 (__W,__U,__A); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_abs_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, (__mmask16)0x0001, (__m512i)(__v16si){-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { @@ -10622,6 +10855,7 @@ __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[ABS]], <16 x i32> %{{.*}} return _mm512_maskz_abs_epi32 (__U,__A); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_abs_epi32((__mmask16)0x0001, (__m512i)(__v16si){-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512 test_mm512_setr_ps (float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, @@ -10907,6 +11141,7 @@ __m512d test_mm512_abs_pd(__m512d a){ // CHECK: and <8 x i64> return _mm512_abs_pd(a); } +TEST_CONSTEXPR(match_m512d(_mm512_abs_pd((__m512d){-1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)); __m512d test_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A){ // CHECK-LABEL: test_mm512_mask_abs_pd @@ -10915,12 +11150,14 @@ __m512d test_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A){ // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[AND_RES]], <8 x i64> %{{.*}} return _mm512_mask_abs_pd (__W,__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_abs_pd((__m512d){99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0}, (__mmask8)0x01, (__m512d){-1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), 1.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0)); __m512 test_mm512_abs_ps(__m512 a){ // CHECK-LABEL: test_mm512_abs_ps // CHECK: and <16 x i32> return _mm512_abs_ps(a); } +TEST_CONSTEXPR(match_m512(_mm512_abs_ps((__m512){-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, 6.0f, -7.0f, 8.0f, -9.0f, 10.0f, -11.0f, 12.0f, -13.0f, 14.0f, -15.0f, -16.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f)); __m512 test_mm512_mask_abs_ps(__m512 __W, __mmask16 __U, __m512 __A){ // CHECK-LABEL: test_mm512_mask_abs_ps @@ -10929,6 +11166,7 @@ __m512 test_mm512_mask_abs_ps(__m512 __W, __mmask16 __U, __m512 __A){ // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_abs_ps( __W, __U, __A); } +TEST_CONSTEXPR(match_m512(_mm512_mask_abs_ps((__m512){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, (__mmask16)0x0001, (__m512){-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m512d test_mm512_zextpd128_pd512(__m128d A) { // CHECK-LABEL: test_mm512_zextpd128_pd512 @@ -10936,6 +11174,7 @@ __m512d test_mm512_zextpd128_pd512(__m128d A) { // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> return _mm512_zextpd128_pd512(A); } +TEST_CONSTEXPR(match_m512d(_mm512_zextpd128_pd512((__m128d){+99.0, -1.0}), +99.0, -1.0, +0.0, +0.0, 0.0, 0.0, 0.0, 0.0)); __m512d test_mm512_zextpd256_pd512(__m256d A) { // CHECK-LABEL: test_mm512_zextpd256_pd512 @@ -10943,6 +11182,7 @@ __m512d test_mm512_zextpd256_pd512(__m256d A) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> return _mm512_zextpd256_pd512(A); } +TEST_CONSTEXPR(match_m512d(_mm512_zextpd256_pd512((__m256d){1.0, 2.0, 3.0, 4.0}), 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0)); __m512 test_mm512_zextps128_ps512(__m128 A) { // CHECK-LABEL: test_mm512_zextps128_ps512 @@ -10950,6 +11190,7 @@ __m512 test_mm512_zextps128_ps512(__m128 A) { // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> return _mm512_zextps128_ps512(A); } +TEST_CONSTEXPR(match_m512(_mm512_zextps128_ps512((__m128){1.0f, 2.0f, 3.0f, 4.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); __m512 test_mm512_zextps256_ps512(__m256 A) { // CHECK-LABEL: test_mm512_zextps256_ps512 @@ -10957,6 +11198,7 @@ __m512 test_mm512_zextps256_ps512(__m256 A) { // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm512_zextps256_ps512(A); } +TEST_CONSTEXPR(match_m512(_mm512_zextps256_ps512((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); __m512i test_mm512_zextsi128_si512(__m128i A) { // CHECK-LABEL: test_mm512_zextsi128_si512 @@ -10964,6 +11206,7 @@ __m512i test_mm512_zextsi128_si512(__m128i A) { // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> return _mm512_zextsi128_si512(A); } +TEST_CONSTEXPR(match_m512i(_mm512_zextsi128_si512((__m128i){1, 2}), 1, 2, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_zextsi256_si512(__m256i A) { // CHECK-LABEL: test_mm512_zextsi256_si512 @@ -10971,6 +11214,7 @@ __m512i test_mm512_zextsi256_si512(__m256i A) { // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> return _mm512_zextsi256_si512(A); } +TEST_CONSTEXPR(match_m512i(_mm512_zextsi256_si512((__m256i){1, 2, 3, 4}), 1, 2, 3, 4, 0, 0, 0, 0)); __m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) { // CHECK-LABEL: test_mm512_i32logather_pd diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c index 3fcddb02d16c..37443d584614 100644 --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -3,6 +3,12 @@ // RUN: %clang_cc1 -x c++ -ffreestanding -flax-vector-conversions=none %s -triple=x86_64-unknown-unknown -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 // RUN: %clang_cc1 -x c++ -ffreestanding -flax-vector-conversions=none %s -triple=i686-unknown-unknown -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c -ffreestanding -flax-vector-conversions=none %s -triple=x86_64-unknown-unknown -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -ffreestanding -flax-vector-conversions=none %s -triple=i686-unknown-unknown -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -ffreestanding -flax-vector-conversions=none %s -triple=x86_64-unknown-unknown -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -ffreestanding -flax-vector-conversions=none %s -triple=i686-unknown-unknown -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK + + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -376,18 +382,21 @@ __m256h test_mm256_zextph128_ph256(__m128h __a) { // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> {{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm256_zextph128_ph256(__a); } +TEST_CONSTEXPR(match_m256h(_mm256_zextph128_ph256((__m128h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16}), 1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16)); __m512h test_mm512_zextph128_ph512(__m128h __a) { // CHECK-LABEL: test_mm512_zextph128_ph512 // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> {{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm512_zextph128_ph512(__a); } +TEST_CONSTEXPR(match_m512h(_mm512_zextph128_ph512((__m128h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16}), 1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16)); __m512h test_mm512_zextph256_ph512(__m256h __a) { // CHECK-LABEL: test_mm512_zextph256_ph512 // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> {{.*}}, <32 x i32> return _mm512_zextph256_ph512(__a); } +TEST_CONSTEXPR(match_m512h(_mm512_zextph256_ph512((__m256h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16}), 1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16)); int test_mm_comi_round_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_comi_round_sh @@ -709,6 +718,7 @@ __m512h test_mm512_abs_ph(__m512h a) { // CHECK: and <16 x i32> return _mm512_abs_ph(a); } +TEST_CONSTEXPR(match_m512h(_mm512_abs_ph((__m512h){-1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0, -17.0, 18.0, -19.0, 20.0, -21.0, 22.0, -23.0, 24.0, -25.0, 26.0, -27.0, 28.0, -29.0, 30.0, -31.0, 32.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0)); __m512h test_mm512_conj_pch(__m512h __A) { // CHECK-LABEL: test_mm512_conj_pch diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c index 4f6139b81960..560035598a6e 100644 --- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" __m512i test_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) { // CHECK-LABEL: test_mm512_mask_compress_epi16 @@ -95,6 +96,7 @@ __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shldi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 38280596832649216LL, -40532396646334464LL, 999, 999, 999, -47287796087390209LL, 999)); __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi64 @@ -102,12 +104,14 @@ __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shldi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 38280596832649216LL, -40532396646334464LL, 0, 0, 0, -47287796087390209LL, 0)); __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi64 // CHECK: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v8di(_mm512_shldi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 38280596832649215LL, 38280596832649216LL, -40532396646334464LL, 45035996273704959LL, -42784196460019713LL, 47287796087390208LL, -47287796087390209LL, 54043195528445951LL)); __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldi_epi32 @@ -115,6 +119,7 @@ __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shldi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 999, 999, 73727, 999, -73729, -75777, 81919, 999, 86015, 999, 999, 999, 94207, -92161, 999)); __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi32 @@ -122,12 +127,14 @@ __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shldi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 0, 0, 73727, 0, -73729, -75777, 81919, 0, 86015, 0, 0, 0, 94207, -92161, 0)); __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi32 // CHECK: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v16si(_mm512_shldi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 67584, -69632, 73727, 73728, -73729, -75777, 81919, -79873, 86015, 88063, -86017, -88065, 94207, -92161, -96256)); __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldi_epi16 @@ -135,6 +142,7 @@ __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shldi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 999, -8321, 999, 8704, -8832, 999, -8961, 9216, 9344, 999, 999, -9728, 9983, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 11136, 999, 999, 999, 11775, 11903, 999, -11905, 999)); __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi16 @@ -142,12 +150,14 @@ __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shldi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 0, -8321, 0, 8704, -8832, 0, -8961, 9216, 9344, 0, 0, -9728, 9983, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 11136, 0, 0, 0, 11775, 11903, 0, -11905, 0)); __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi16 // CHECK: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15)) return _mm512_shldi_epi16(__A, __B, 15); } +TEST_CONSTEXPR(match_v32hi(_mm512_shldi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 8320, -8321, 8576, 8704, -8832, 8960, -8961, 9216, 9344, -9472, 9727, -9728, 9983, 10111, -10112, -10240, -10241, 10623, -10497, 10752, 11007, 11008, 11136, -11137, -11392, 11647, 11775, 11903, -11777, -11905, 12160)); __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi64 @@ -155,6 +165,7 @@ __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shrdi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 16384, 32767, 999, 999, 999, -49153, 999)); __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi64 @@ -162,12 +173,14 @@ __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shrdi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 16384, 32767, 0, 0, 0, -49153, 0)); __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi64 // CHECK: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v8di(_mm512_shrdi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), -8192, 16384, 32767, -32768, -32769, 49152, -49153, -65536)); __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi32 @@ -175,6 +188,7 @@ __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shrdi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 999, 999, -8388608, 999, -10485761, -12582913, -16777216, 999, -20971520, 999, 999, 999, -29360128, -29360129, 999)); __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi32 @@ -182,12 +196,14 @@ __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shrdi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 0, 0, -8388608, 0, -10485761, -12582913, -16777216, 0, -20971520, 0, 0, 0, -29360128, -29360129, 0)); __m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi32 // CHECK: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v16si(_mm512_shrdi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 4194304, 8388607, -8388608, 10485760, -10485761, -12582913, -16777216, -16777217, -20971520, -23068672, -23068673, -25165825, -29360128, -29360129, 35651583)); __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi16 @@ -195,6 +211,7 @@ __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shrdi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 999, -1025, 999, 2560, 3583, 999, -3585, 4608, 5120, 999, 999, 7167, -7168, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 12288, 999, 999, 999, -14336, -14848, 999, -15361, 999)); __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi16 @@ -202,12 +219,14 @@ __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shrdi_epi16(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shrdi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 0, -1025, 0, 2560, 3583, 0, -3585, 4608, 5120, 0, 0, 7167, -7168, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 12288, 0, 0, 0, -14336, -14848, 0, -15361, 0)); __m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi16 // CHECK: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31)) return _mm512_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v32hi(_mm512_shrdi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 1024, -1025, 2048, 2560, 3583, 3584, -3585, 4608, 5120, 6143, -6144, 7167, -7168, -7680, 8703, 9215, -8705, -9728, -9729, 10752, -11264, 11776, 12288, -12289, 13823, -13824, -14336, -14848, -14849, -15361, 16384)); __m512i test_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldv_epi64 @@ -215,6 +234,7 @@ __m512i test_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shldv_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, 0xC1, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1080863910568919041LL, 17, -18, 19, -20, 21, 91, -9223372036854775804LL)); __m512i test_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldv_epi64 @@ -222,12 +242,14 @@ __m512i test_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shldv_epi64(0xC1, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1080863910568919041LL, 0, 0, 0, 0, 0, 91, -9223372036854775804LL)); __m512i test_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldv_epi64 // CHECK: call {{.*}}<8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_shldv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_shldv_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1080863910568919041LL, 2176, -5188146770730811392LL, 639, -3458764513820540929LL, -6917529027641081856LL, 91, -9223372036854775804LL)); __m512i test_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldv_epi32 @@ -235,6 +257,7 @@ __m512i test_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shldv_epi32((__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, 0x26D8, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 32, -33, 34, 18874367, 37748736, 37, -159383552, 327155712, -40, -5248, -1476395008, 43, 44, 360, 46, -47)); __m512i test_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldv_epi32 @@ -242,12 +265,14 @@ __m512i test_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shldv_epi32(0x26D8, (__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 0, 0, 0, 18874367, 37748736, 0, -159383552, 327155712, 0, -5248, -1476395008, 0, 0, 360, 0, 0)); __m512i test_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldv_epi32 // CHECK: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_shldv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_shldv_epi32((__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 2097152, -4325376, 573439, 18874367, 37748736, 77823, -159383552, 327155712, -10240, -5248, -1476395008, 1376, 719, 360, -1073741828, -2147483640)); __m512i test_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldv_epi16 @@ -255,6 +280,7 @@ __m512i test_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shldv_epi16((__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, 0x73314D8, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), -64, 65, 66, 536, 1088, -69, 7167, 29184, -72, 73, 10240, 75, -1216, -77, -78, -79, -80, -162, 82, -83, 16385, 2751, 86, 87, -22528, 11519, 5760, -91, 92, 93, 94, 95)); __m512i test_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldv_epi16 @@ -262,12 +288,14 @@ __m512i test_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __ // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shldv_epi16(0x73314D8, (__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), 0, 0, 0, 536, 1088, 0, 7167, 29184, 0, 0, 10240, 0, -1216, 0, 0, 0, -80, -162, 0, 0, 16385, 2751, 0, 0, -22528, 11519, 5760, 0, 0, 0, 0, 0)); __m512i test_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldv_epi16 // CHECK: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_shldv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_shldv_epi16((__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), -64, 130, 267, 536, 1088, -8193, 7167, 29184, -18432, -27649, 10240, 2400, -1216, -609, -312, -32760, -80, -162, -32764, -24574, 16385, 2751, 5567, 11136, -22528, 11519, 5760, 10240, -12290, 751, 379, -16)); __m512i test_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdv_epi64 @@ -275,6 +303,7 @@ __m512i test_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shrdv_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, 0xC1, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1, 17, -18, 19, -20, 21, 4611686018427387909LL, 17)); __m512i test_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdv_epi64 @@ -282,12 +311,14 @@ __m512i test_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shrdv_epi64(0xC1, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1, 0, 0, 0, 0, 0, 4611686018427387909LL, 17)); __m512i test_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdv_epi64 // CHECK: call {{.*}}<8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_shrdv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_shrdv_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, -23}, (__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, 8}, (__m512i)(__v8di){ -8, 7, -6, 5, -4, -3, 2, -1}), -1, 288230376151711744LL, 255, -2305843009213693952LL, -65, 48, 4611686018427387909LL, 17)); __m512i test_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdv_epi32 @@ -295,6 +326,7 @@ __m512i test_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shrdv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shrdv_epi32((__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, 0x26D8, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 32, -33, 34, -32768, 20480, 37, 8191, 4096, -40, 369098751, 704, 43, 44, -1073741819, 46, -47)); __m512i test_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdv_epi32 @@ -302,12 +334,14 @@ __m512i test_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shrdv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shrdv_epi32(0x26D8, (__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 0, 0, 0, -32768, 20480, 0, 8191, 4096, 0, 369098751, 704, 0, 0, -1073741819, 0, 0)); __m512i test_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdv_epi32 // CHECK: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_shrdv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v16si(_mm512_shrdv_epi32((__m512i)(__v16si){ 32, -33, 34, 35, 36, 37, -38, 39, -40, -41, 42, 43, 44, 45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, 5, -6, 7, 8, 9, 10, 11, 12, -13, 14, -15, 16}, (__m512i)(__v16si){ 16, -15, 14, -13, -12, 11, -10, -9, 8, 7, -6, 5, 4, 3, -2, -1}), 65536, 98303, -786432, -32768, 20480, -12582912, 8191, 4096, 167772159, 369098751, 704, 1610612737, 805306370, -1073741819, -60, 33)); __m512i test_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdv_epi16 @@ -315,6 +349,7 @@ __m512i test_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shrdv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shrdv_epi16((__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, 0x73314D8, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), -64, 65, 66, -32760, 20484, -69, -448, 1151, -72, 73, 704, 75, -8197, -77, -78, -79, -80, 32727, 82, -83, 336, 20482, 86, 87, 6655, -13312, 27649, -91, 92, 93, 94, 95)); __m512i test_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdv_epi16 @@ -322,10 +357,12 @@ __m512i test_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __ // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shrdv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shrdv_epi16(0x73314D8, (__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), 0, 0, 0, -32760, 20484, 0, -448, 1151, 0, 0, 704, 0, -8197, 0, 0, 0, -80, 32727, 0, 0, 336, 20482, 0, 0, 6655, -13312, 27649, 0, 0, 0, 0, 0)); __m512i test_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdv_epi16 // CHECK: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_shrdv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_shrdv_epi16((__m512i)(__v32hi){ -64, 65, 66, 67, 68, -69, 70, -71, -72, 73, 74, 75, -76, -77, -78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, 89, 90, -91, 92, 93, 94, 95}, (__m512i)(__v32hi){ -1, 2, -3, 4, 5, -6, -7, 8, 9, -10, 11, 12, 13, -14, 15, 16, -17, 18, 19, 20, 21, -22, -23, 24, 25, -26, 27, 28, -29, -30, -31, -32}, (__m512i)(__v32hi){ -32, -31, -30, -29, -28, 27, 26, 25, 24, -23, -22, 21, 20, 19, 18, -17, -16, -15, 14, 13, 12, -11, -10, -9, -8, 7, 6, -5, -4, 3, 2, -1}), -64, 32, 16400, -32760, 20484, -161, -448, 1151, 2559, -1280, 704, 24578, -8197, 24566, -20, 33, -80, 32727, 76, 167, 336, 20482, -23551, 12288, 6655, -13312, 27649, 927, -464, 16395, 16407, -64)); diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index a1c267dd51bf..9daecd0d9875 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -1,6 +1,9 @@ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -831,6 +834,7 @@ __m256i test_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B) { //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_mullo_epi32(__M, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_mullo_epi32(0x0F, (__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, 0, 0, 0, 0)); __m256i test_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { @@ -839,12 +843,14 @@ __m256i test_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A, //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_mullo_epi32(__W, __M, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_mullo_epi32((__m256i)(__v8si){-100, +200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, -500, +600, -700, +800)); __m256i test_mm256_and_epi32 (__m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_and_epi32 //CHECK: and <8 x i32> %{{.*}}, %{{.*}} return _mm256_and_epi32(__A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_and_epi32((__m256i)(__v4di){7, 7, 7, 7}, (__m256i)(__v4di){3, 3, 3, 3}), 3, 3, 3, 3)); __m256i test_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_and_epi32 @@ -865,6 +871,7 @@ __m128i test_mm_and_epi32 (__m128i __A, __m128i __B) { //CHECK: and <4 x i32> %{{.*}}, %{{.*}} return _mm_and_epi32(__A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_and_epi32((__m128i)(__v2di){7, 7}, (__m128i)(__v2di){3, 3}), 3, 3)); __m128i test_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_and_epi32 @@ -2838,275 +2845,519 @@ __mmask8 test_mm_mask_cmp_pd_mask_true_us(__mmask8 m, __m128d a, __m128d b) { __m128d test_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { // CHECK-LABEL: test_mm_mask_fmadd_pd - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_fmadd_pd(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m128d(_mm_mask_fmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, 14.0)); +TEST_CONSTEXPR(match_m128d(_mm_mask_fmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 8.0, 2.0)); + +__m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm_mask3_fmadd_pd + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + return _mm_mask3_fmadd_pd(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m128d(_mm_mask3_fmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, 14.0)); +TEST_CONSTEXPR(match_m128d(_mm_mask3_fmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), 8.0, 6.0)); + +__m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { + // CHECK-LABEL: test_mm_maskz_fmadd_pd + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + return _mm_maskz_fmadd_pd(__U, __A, __B, __C); +} +TEST_CONSTEXPR(match_m128d(_mm_maskz_fmadd_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, 14.0)); +TEST_CONSTEXPR(match_m128d(_mm_maskz_fmadd_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 8.0, 0.0)); __m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { // CHECK-LABEL: test_mm_mask_fmsub_pd // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_fmsub_pd(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m128d(_mm_mask_fmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, 2.0)); +TEST_CONSTEXPR(match_m128d(_mm_mask_fmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -2.0, 2.0)); -__m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm_mask3_fmadd_pd - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) +__m128d test_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm_mask3_fmsub_pd + // CHECK: fneg <2 x double> %{{.*}} + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_mask3_fmadd_pd(__A, __B, __C, __U); + return _mm_mask3_fmsub_pd(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m128d(_mm_mask3_fmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, 2.0)); +TEST_CONSTEXPR(match_m128d(_mm_mask3_fmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), -2.0, 6.0)); -__m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm_mask3_fnmadd_pd +__m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { + // CHECK-LABEL: test_mm_maskz_fmsub_pd // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_mask3_fnmadd_pd(__A, __B, __C, __U); + return _mm_maskz_fmsub_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_fmsub_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, 2.0)); +TEST_CONSTEXPR(match_m128d(_mm_maskz_fmsub_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -2.0, 0.0)); -__m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - // CHECK-LABEL: test_mm_maskz_fmadd_pd - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) +__m128d test_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { + // CHECK-LABEL: test_mm_mask_fnmadd_pd + // CHECK: fneg <2 x double> %{{.*}} + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_maskz_fmadd_pd(__U, __A, __B, __C); + return _mm_mask_fnmadd_pd(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m128d(_mm_mask_fnmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, -2.0)); +TEST_CONSTEXPR(match_m128d(_mm_mask_fnmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 2.0, 2.0)); -__m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - // CHECK-LABEL: test_mm_maskz_fmsub_pd +__m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm_mask3_fnmadd_pd // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_maskz_fmsub_pd(__U, __A, __B, __C); + return _mm_mask3_fnmadd_pd(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, -2.0)); +TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), 2.0, 6.0)); __m128d test_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { // CHECK-LABEL: test_mm_maskz_fnmadd_pd // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_fnmadd_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_fnmadd_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, -2.0)); +TEST_CONSTEXPR(match_m128d(_mm_maskz_fnmadd_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 2.0, 0.0)); + +__m128d test_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { + // CHECK-LABEL: test_mm_mask_fnmsub_pd + // CHECK: fneg <2 x double> %{{.*}} + // CHECK: fneg <2 x double> %{{.*}} + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + return _mm_mask_fnmsub_pd(__A, __U, __B, __C); +} +TEST_CONSTEXPR(match_m128d(_mm_mask_fnmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, -14.0)); +TEST_CONSTEXPR(match_m128d(_mm_mask_fnmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -8.0, 2.0)); + +__m128d test_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm_mask3_fnmsub_pd + // CHECK: fneg <2 x double> %{{.*}} + // CHECK: fneg <2 x double> %{{.*}} + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} + return _mm_mask3_fnmsub_pd(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, -14.0)); +TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), -8.0, 6.0)); __m128d test_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { // CHECK-LABEL: test_mm_maskz_fnmsub_pd // CHECK: fneg <2 x double> %{{.*}} // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_fnmsub_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_fnmsub_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, -14.0)); +TEST_CONSTEXPR(match_m128d(_mm_maskz_fnmsub_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -8.0, 0.0)); __m256d test_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { // CHECK-LABEL: test_mm256_mask_fmadd_pd - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_fmadd_pd(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_fmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b1100, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 1.0, 2.0, 32.0, 44.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask_fmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b0011, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 14.0, 22.0, 3.0, 4.0)); + +__m256d test_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm256_mask3_fmadd_pd + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_mask3_fmadd_pd(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m256d(_mm256_mask3_fmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b1100), 9.0, 10.0, 32.0, 44.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask3_fmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b0011), 14.0, 22.0, 11.0, 12.0)); + +__m256d test_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { + // CHECK-LABEL: test_mm256_maskz_fmadd_pd + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_maskz_fmadd_pd(__U, __A, __B, __C); +} +TEST_CONSTEXPR(match_m256d(_mm256_maskz_fmadd_pd((__mmask8)0b1100, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 0.0, 0.0, 32.0, 44.0)); +TEST_CONSTEXPR(match_m256d(_mm256_maskz_fmadd_pd((__mmask8)0b0011, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 14.0, 22.0, 0.0, 0.0)); __m256d test_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { // CHECK-LABEL: test_mm256_mask_fmsub_pd // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_fmsub_pd(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_fmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b1100, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 1.0, 2.0, 10.0, 20.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask_fmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b0011, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), -4.0, 2.0, 3.0, 4.0)); -__m256d test_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm256_mask3_fmadd_pd - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) +__m256d test_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm256_mask3_fmsub_pd + // CHECK: fneg <4 x double> %{{.*}} + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fmadd_pd(__A, __B, __C, __U); + return _mm256_mask3_fmsub_pd(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m256d(_mm256_mask3_fmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b1100), 9.0, 10.0, 10.0, 20.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask3_fmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b0011), -4.0, 2.0, 11.0, 12.0)); -__m256d test_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm256_mask3_fnmadd_pd +__m256d test_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { + // CHECK-LABEL: test_mm256_maskz_fmsub_pd // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fnmadd_pd(__A, __B, __C, __U); + return _mm256_maskz_fmsub_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_fmsub_pd((__mmask8)0b1100, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 0.0, 0.0, 10.0, 20.0)); +TEST_CONSTEXPR(match_m256d(_mm256_maskz_fmsub_pd((__mmask8)0b0011, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), -4.0, 2.0, 0.0, 0.0)); -__m256d test_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: test_mm256_maskz_fmadd_pd - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) +__m256d test_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { + // CHECK-LABEL: test_mm256_mask_fnmadd_pd + // CHECK: fneg <4 x double> %{{.*}} + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_fmadd_pd(__U, __A, __B, __C); + return _mm256_mask_fnmadd_pd(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_fnmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b1100, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 1.0, 2.0, -10.0, -20.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask_fnmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b0011, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 4.0, -2.0, 3.0, 4.0)); -__m256d test_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: test_mm256_maskz_fmsub_pd +__m256d test_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm256_mask3_fnmadd_pd // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_fmsub_pd(__U, __A, __B, __C); + return _mm256_mask3_fnmadd_pd(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m256d(_mm256_mask3_fnmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b1100), 9.0, 10.0, -10.0, -20.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask3_fnmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b0011), 4.0, -2.0, 11.0, 12.0)); __m256d test_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { // CHECK-LABEL: test_mm256_maskz_fnmadd_pd // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_fnmadd_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_fnmadd_pd((__mmask8)0b1100, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 0.0, 0.0, -10.0, -20.0)); +TEST_CONSTEXPR(match_m256d(_mm256_maskz_fnmadd_pd((__mmask8)0b0011, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 4.0, -2.0, 0.0, 0.0)); + +__m256d test_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { + // CHECK-LABEL: test_mm256_mask_fnmsub_pd + // CHECK: fneg <4 x double> %{{.*}} + // CHECK: fneg <4 x double> %{{.*}} + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_mask_fnmsub_pd(__A, __U, __B, __C); +} +TEST_CONSTEXPR(match_m256d(_mm256_mask_fnmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b1100, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 1.0, 2.0, -32.0, -44.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask_fnmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b0011, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), -14.0, -22.0, 3.0, 4.0)); + +__m256d test_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { + // CHECK-LABEL: test_mm256_mask3_fnmsub_pd + // CHECK: fneg <4 x double> %{{.*}} + // CHECK: fneg <4 x double> %{{.*}} + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_mask3_fnmsub_pd(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m256d(_mm256_mask3_fnmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b1100), 9.0, 10.0, -32.0, -44.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask3_fnmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b0011), -14.0, -22.0, 11.0, 12.0)); __m256d test_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { // CHECK-LABEL: test_mm256_maskz_fnmsub_pd // CHECK: fneg <4 x double> %{{.*}} // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_fnmsub_pd(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_fnmsub_pd((__mmask8)0b1100, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 0.0, 0.0, -32.0, -44.0)); +TEST_CONSTEXPR(match_m256d(_mm256_maskz_fnmsub_pd((__mmask8)0b0011, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), -14.0, -22.0, 0.0, 0.0)); __m128 test_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { // CHECK-LABEL: test_mm_mask_fmadd_ps - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_fmadd_ps(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m128(_mm_mask_fmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b1100, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 1.0, 2.0, 32.0, 44.0)); +TEST_CONSTEXPR(match_m128(_mm_mask_fmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b0011, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 14.0, 22.0, 3.0, 4.0)); + +__m128 test_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { + // CHECK-LABEL: test_mm_mask3_fmadd_ps + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm_mask3_fmadd_ps(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m128(_mm_mask3_fmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b1100), 9.0, 10.0, 32.0, 44.0)); +TEST_CONSTEXPR(match_m128(_mm_mask3_fmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b0011), 14.0, 22.0, 11.0, 12.0)); + +__m128 test_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { + // CHECK-LABEL: test_mm_maskz_fmadd_ps + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm_maskz_fmadd_ps(__U, __A, __B, __C); +} +TEST_CONSTEXPR(match_m128(_mm_maskz_fmadd_ps((__mmask8)0b1100, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 0.0, 0.0, 32.0, 44.0)); +TEST_CONSTEXPR(match_m128(_mm_maskz_fmadd_ps((__mmask8)0b0011, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 14.0, 22.0, 0.0, 0.0)); __m128 test_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { // CHECK-LABEL: test_mm_mask_fmsub_ps // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_fmsub_ps(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m128(_mm_mask_fmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b1100, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 1.0, 2.0, 10.0, 20.0)); +TEST_CONSTEXPR(match_m128(_mm_mask_fmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b0011, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), -4.0, 2.0, 3.0, 4.0)); -__m128 test_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: test_mm_mask3_fmadd_ps - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) +__m128 test_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { + // CHECK-LABEL: test_mm_mask3_fmsub_ps + // CHECK: fneg <4 x float> %{{.*}} + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_mask3_fmadd_ps(__A, __B, __C, __U); + return _mm_mask3_fmsub_ps(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m128(_mm_mask3_fmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b1100), 9.0, 10.0, 10.0, 20.0)); +TEST_CONSTEXPR(match_m128(_mm_mask3_fmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b0011), -4.0, 2.0, 11.0, 12.0)); -__m128 test_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: test_mm_mask3_fnmadd_ps +__m128 test_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { + // CHECK-LABEL: test_mm_maskz_fmsub_ps // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_mask3_fnmadd_ps(__A, __B, __C, __U); + return _mm_maskz_fmsub_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m128(_mm_maskz_fmsub_ps((__mmask8)0b1100, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 0.0, 0.0, 10.0, 20.0)); +TEST_CONSTEXPR(match_m128(_mm_maskz_fmsub_ps((__mmask8)0b0011, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), -4.0, 2.0, 0.0, 0.0)); -__m128 test_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - // CHECK-LABEL: test_mm_maskz_fmadd_ps - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) +__m128 test_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { + // CHECK-LABEL: test_mm_mask_fnmadd_ps + // CHECK: fneg <4 x float> %{{.*}} + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_maskz_fmadd_ps(__U, __A, __B, __C); + return _mm_mask_fnmadd_ps(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m128(_mm_mask_fnmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b1100, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 1.0, 2.0, -10.0, -20.0)); +TEST_CONSTEXPR(match_m128(_mm_mask_fnmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b0011, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 4.0, -2.0, 3.0, 4.0)); -__m128 test_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - // CHECK-LABEL: test_mm_maskz_fmsub_ps +__m128 test_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { + // CHECK-LABEL: test_mm_mask3_fnmadd_ps // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_maskz_fmsub_ps(__U, __A, __B, __C); + return _mm_mask3_fnmadd_ps(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m128(_mm_mask3_fnmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b1100), 9.0, 10.0, -10.0, -20.0)); +TEST_CONSTEXPR(match_m128(_mm_mask3_fnmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b0011), 4.0, -2.0, 11.0, 12.0)); __m128 test_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { // CHECK-LABEL: test_mm_maskz_fnmadd_ps // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_fnmadd_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m128(_mm_maskz_fnmadd_ps((__mmask8)0b1100, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 0.0, 0.0, -10.0, -20.0)); +TEST_CONSTEXPR(match_m128(_mm_maskz_fnmadd_ps((__mmask8)0b0011, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 4.0, -2.0, 0.0, 0.0)); + +__m128 test_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { + // CHECK-LABEL: test_mm_mask_fnmsub_ps + // CHECK: fneg <4 x float> %{{.*}} + // CHECK: fneg <4 x float> %{{.*}} + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm_mask_fnmsub_ps(__A, __U, __B, __C); +} +TEST_CONSTEXPR(match_m128(_mm_mask_fnmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b1100, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 1.0, 2.0, -32.0, -44.0)); +TEST_CONSTEXPR(match_m128(_mm_mask_fnmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b0011, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), -14.0, -22.0, 3.0, 4.0)); + +__m128 test_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { + // CHECK-LABEL: test_mm_mask3_fnmsub_ps + // CHECK: fneg <4 x float> %{{.*}} + // CHECK: fneg <4 x float> %{{.*}} + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm_mask3_fnmsub_ps(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m128(_mm_mask3_fnmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b1100), 9.0, 10.0, -32.0, -44.0)); +TEST_CONSTEXPR(match_m128(_mm_mask3_fnmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b0011), -14.0, -22.0, 11.0, 12.0)); __m128 test_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { // CHECK-LABEL: test_mm_maskz_fnmsub_ps // CHECK: fneg <4 x float> %{{.*}} // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_fnmsub_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m128(_mm_maskz_fnmsub_ps((__mmask8)0b1100, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 0.0, 0.0, -32.0, -44.0)); +TEST_CONSTEXPR(match_m128(_mm_maskz_fnmsub_ps((__mmask8)0b0011, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), -14.0, -22.0, 0.0, 0.0)); __m256 test_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { // CHECK-LABEL: test_mm256_mask_fmadd_ps - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_fmadd_ps(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m256(_mm256_mask_fmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, 86.0, 106.0, 128.0, 152.0)); +TEST_CONSTEXPR(match_m256(_mm256_mask_fmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 5.0, 6.0, 7.0, 8.0)); + +__m256 test_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { + // CHECK-LABEL: test_mm256_mask3_fmadd_ps + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_mask3_fmadd_ps(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m256(_mm256_mask3_fmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, 86.0, 106.0, 128.0, 152.0)); +TEST_CONSTEXPR(match_m256(_mm256_mask3_fmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), 26.0, 38.0, 52.0, 68.0, 21.0, 22.0, 23.0, 24.0)); + +__m256 test_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { + // CHECK-LABEL: test_mm256_maskz_fmadd_ps + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_maskz_fmadd_ps(__U, __A, __B, __C); +} +TEST_CONSTEXPR(match_m256(_mm256_maskz_fmadd_ps((__mmask8)0b11110000, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, 86.0, 106.0, 128.0, 152.0)); +TEST_CONSTEXPR(match_m256(_mm256_maskz_fmadd_ps((__mmask8)0b00001111, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 0.0, 0.0, 0.0, 0.0)); __m256 test_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { // CHECK-LABEL: test_mm256_mask_fmsub_ps // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_fmsub_ps(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m256(_mm256_mask_fmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, 44.0, 62.0, 82.0, 104.0)); +TEST_CONSTEXPR(match_m256(_mm256_mask_fmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 5.0, 6.0, 7.0, 8.0)); -__m256 test_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: test_mm256_mask3_fmadd_ps - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) +__m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { + // CHECK-LABEL: test_mm256_mask3_fmsub_ps + // CHECK: fneg <8 x float> %{{.*}} + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fmadd_ps(__A, __B, __C, __U); + return _mm256_mask3_fmsub_ps(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m256(_mm256_mask3_fmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, 44.0, 62.0, 82.0, 104.0)); +TEST_CONSTEXPR(match_m256(_mm256_mask3_fmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), -8.0, 2.0, 14.0, 28.0, 21.0, 22.0, 23.0, 24.0)); -__m256 test_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: test_mm256_mask3_fnmadd_ps +__m256 test_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { + // CHECK-LABEL: test_mm256_maskz_fmsub_ps // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fnmadd_ps(__A, __B, __C, __U); + return _mm256_maskz_fmsub_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_fmsub_ps((__mmask8)0b11110000, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, 44.0, 62.0, 82.0, 104.0)); +TEST_CONSTEXPR(match_m256(_mm256_maskz_fmsub_ps((__mmask8)0b00001111, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 0.0, 0.0, 0.0, 0.0)); -__m256 test_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: test_mm256_maskz_fmadd_ps - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) +__m256 test_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { + // CHECK-LABEL: test_mm256_mask_fnmadd_ps + // CHECK: fneg <8 x float> %{{.*}} + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_fmadd_ps(__U, __A, __B, __C); + return _mm256_mask_fnmadd_ps(__A, __U, __B, __C); } +TEST_CONSTEXPR(match_m256(_mm256_mask_fnmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, -44.0, -62.0, -82.0, -104.0)); +TEST_CONSTEXPR(match_m256(_mm256_mask_fnmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, 5.0, 6.0, 7.0, 8.0)); -__m256 test_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: test_mm256_maskz_fmsub_ps +__m256 test_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { + // CHECK-LABEL: test_mm256_mask3_fnmadd_ps // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_fmsub_ps(__U, __A, __B, __C); + return _mm256_mask3_fnmadd_ps(__A, __B, __C, __U); } +TEST_CONSTEXPR(match_m256( _mm256_mask3_fnmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, -44.0, -62.0, -82.0, -104.0)); +TEST_CONSTEXPR(match_m256( _mm256_mask3_fnmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), 8.0, -2.0, -14.0, -28.0, 21.0, 22.0, 23.0, 24.0)); __m256 test_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { // CHECK-LABEL: test_mm256_maskz_fnmadd_ps // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_fnmadd_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_fnmadd_ps((__mmask8)0b11110000, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, -44.0, -62.0, -82.0, -104.0)); +TEST_CONSTEXPR(match_m256(_mm256_maskz_fnmadd_ps((__mmask8)0b00001111, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, 0.0, 0.0, 0.0, 0.0)); + +__m256 test_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { + // CHECK-LABEL: test_mm256_mask_fnmsub_ps + // CHECK: fneg <8 x float> %{{.*}} + // CHECK: fneg <8 x float> %{{.*}} + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_mask_fnmsub_ps(__A, __U, __B, __C); +} +TEST_CONSTEXPR(match_m256(_mm256_mask_fnmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, -86.0, -106.0, -128.0, -152.0)); +TEST_CONSTEXPR(match_m256(_mm256_mask_fnmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, 5.0, 6.0, 7.0, 8.0)); + +__m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { + // CHECK-LABEL: test_mm256_mask3_fnmsub_ps + // CHECK: fneg <8 x float> %{{.*}} + // CHECK: fneg <8 x float> %{{.*}} + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_mask3_fnmsub_ps(__A, __B, __C, __U); +} +TEST_CONSTEXPR(match_m256(_mm256_mask3_fnmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, -86.0, -106.0, -128.0, -152.0)); +TEST_CONSTEXPR(match_m256(_mm256_mask3_fnmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), -26.0, -38.0, -52.0, -68.0, 21.0, 22.0, 23.0, 24.0)); __m256 test_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { // CHECK-LABEL: test_mm256_maskz_fnmsub_ps // CHECK: fneg <8 x float> %{{.*}} // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_fnmsub_ps(__U, __A, __B, __C); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_fnmsub_ps((__mmask8)0b11110000, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, -86.0, -106.0, -128.0, -152.0)); +TEST_CONSTEXPR(match_m256(_mm256_maskz_fnmsub_ps((__mmask8)0b00001111, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, 0.0, 0.0, 0.0, 0.0)); __m128d test_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { // CHECK-LABEL: test_mm_mask_fmaddsub_pd @@ -3283,41 +3534,6 @@ __m256 test_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 return _mm256_maskz_fmsubadd_ps(__U, __A, __B, __C); } -__m128d test_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm_mask3_fmsub_pd - // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_mask3_fmsub_pd(__A, __B, __C, __U); -} - -__m256d test_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm256_mask3_fmsub_pd - // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fmsub_pd(__A, __B, __C, __U); -} - -__m128 test_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: test_mm_mask3_fmsub_ps - // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_mask3_fmsub_ps(__A, __B, __C, __U); -} - -__m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: test_mm256_mask3_fmsub_ps - // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fmsub_ps(__A, __B, __C, __U); -} - __m128d test_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { // CHECK-LABEL: test_mm_mask3_fmsubadd_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} @@ -3353,119 +3569,6 @@ __m256 test_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 return _mm256_mask3_fmsubadd_ps(__A, __B, __C, __U); } -__m128d test_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - // CHECK-LABEL: test_mm_mask_fnmadd_pd - // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_mask_fnmadd_pd(__A, __U, __B, __C); -} - -__m256d test_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - // CHECK-LABEL: test_mm256_mask_fnmadd_pd - // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_fnmadd_pd(__A, __U, __B, __C); -} - -__m128 test_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - // CHECK-LABEL: test_mm_mask_fnmadd_ps - // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_mask_fnmadd_ps(__A, __U, __B, __C); -} - -__m256 test_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - // CHECK-LABEL: test_mm256_mask_fnmadd_ps - // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fnmadd_ps(__A, __U, __B, __C); -} - -__m128d test_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { - // CHECK-LABEL: test_mm_mask_fnmsub_pd - // CHECK: fneg <2 x double> %{{.*}} - // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_mask_fnmsub_pd(__A, __U, __B, __C); -} - -__m128d test_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm_mask3_fnmsub_pd - // CHECK: fneg <2 x double> %{{.*}} - // CHECK: fneg <2 x double> %{{.*}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1> - // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_mask3_fnmsub_pd(__A, __B, __C, __U); -} - -__m256d test_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { - // CHECK-LABEL: test_mm256_mask_fnmsub_pd - // CHECK: fneg <4 x double> %{{.*}} - // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_fnmsub_pd(__A, __U, __B, __C); -} - -__m256d test_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - // CHECK-LABEL: test_mm256_mask3_fnmsub_pd - // CHECK: fneg <4 x double> %{{.*}} - // CHECK: fneg <4 x double> %{{.*}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fnmsub_pd(__A, __B, __C, __U); -} - -__m128 test_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { - // CHECK-LABEL: test_mm_mask_fnmsub_ps - // CHECK: fneg <4 x float> %{{.*}} - // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_mask_fnmsub_ps(__A, __U, __B, __C); -} - -__m128 test_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - // CHECK-LABEL: test_mm_mask3_fnmsub_ps - // CHECK: fneg <4 x float> %{{.*}} - // CHECK: fneg <4 x float> %{{.*}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_mask3_fnmsub_ps(__A, __B, __C, __U); -} - -__m256 test_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { - // CHECK-LABEL: test_mm256_mask_fnmsub_ps - // CHECK: fneg <8 x float> %{{.*}} - // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fnmsub_ps(__A, __U, __B, __C); -} - -__m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - // CHECK-LABEL: test_mm256_mask3_fnmsub_ps - // CHECK: fneg <8 x float> %{{.*}} - // CHECK: fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fnmsub_ps(__A, __B, __C, __U); -} - __m128d test_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_mask_add_pd // CHECK: fadd <2 x double> %{{.*}}, %{{.*}} @@ -3681,6 +3784,9 @@ __m128d test_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} return _mm_mask_cvtepi32_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_mask_cvtepi32_pd((__m128d){-777.0, -777.0}, /*10=*/0x2, (__m128i)(__v4si){-1, 1, -2, 2}), -777.0, 1.0)); + __m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepi32_pd // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> @@ -3688,42 +3794,63 @@ __m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} return _mm_maskz_cvtepi32_pd(__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_maskz_cvtepi32_pd(/*10=*/0x2, (__m128i)(__v4si){-1, 1, -2, 2}), 0.0, 1.0)); + __m256d test_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm256_mask_cvtepi32_pd // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} return _mm256_mask_cvtepi32_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_mask_cvtepi32_pd(_mm256_set1_pd(-777.0), /*0101=*/0x5, (__m128i)(__v4si){-1, 1, -2, 2}), -1.0, -777.0, -2.0, -777.0)); + __m256d test_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepi32_pd // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} return _mm256_maskz_cvtepi32_pd(__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_maskz_cvtepi32_pd(/*0101=*/0x5, (__m128i)(__v4si){-1, 1, -2, 2}), -1.0, 0.0, -2.0, 0.0)); + __m128 test_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepi32_ps // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float> // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} return _mm_mask_cvtepi32_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_mask_cvtepi32_ps((__m128){-777.0f, -777.0f, -777.0f, -777.0f}, /*0101=*/0x5, (__m128i)(__v4si){-1, 1, -2, 2}), -1.0f, -777.0f, -2.0f, -777.0f)); + __m128 test_mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepi32_ps // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float> // CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} return _mm_maskz_cvtepi32_ps(__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtepi32_ps(/*0101=*/0x5, (__m128i)(__v4si){-1, 1, -2, 2}), -1.0f, 0.0f, -2.0f, 0.0f)); + __m256 test_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepi32_ps // CHECK: sitofp <8 x i32> %{{.*}} to <8 x float> // CHECK: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} return _mm256_mask_cvtepi32_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_mask_cvtepi32_ps(_mm256_set1_ps(-777.0f), /*1001 0101=*/0x95, (__m256i)(__v8si){-1, 1, -2, 2, -4, 4, -8, 8}), -1.0f, -777.0f, -2.0f, -777.0f, -4.0f, -777.0f, -777.0f, 8.0f)); + __m256 test_mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepi32_ps // CHECK: sitofp <8 x i32> %{{.*}} to <8 x float> // CHECK: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} return _mm256_maskz_cvtepi32_ps(__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_maskz_cvtepi32_ps(/*1001 0101=*/0x95, (__m256i)(__v8si){-1, 1, -2, 2, -4, 4, -8, 8}), -1.0f, 0.0f, -2.0f, 0.0f, -4.0f, 0.0f, 0.0f, 8.0f)); + __m128i test_mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_mask_cvtpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.128 @@ -3988,6 +4115,9 @@ __m128d test_mm_cvtepu32_pd(__m128i __A) { // CHECK: uitofp <2 x i32> %{{.*}} to <2 x double> return _mm_cvtepu32_pd(__A); } + +TEST_CONSTEXPR(match_m128d(_mm_cvtepu32_pd((__m128i)(__v4su){1, 2, 4, 8}), 1.0, 2.0)); + __m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepu32_pd // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> @@ -3995,6 +4125,9 @@ __m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} return _mm_mask_cvtepu32_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_mask_cvtepu32_pd((__m128d){-777.0, -777.0}, /*10=*/0x2, (__m128i)(__v4su){1, 2, 4, 8}), -777.0, 2.0)); + __m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepu32_pd // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> @@ -4002,57 +4135,87 @@ __m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> {{.*}}, <2 x double> {{.*}}, <2 x double> {{.*}} return _mm_maskz_cvtepu32_pd(__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_maskz_cvtepu32_pd(/*10=*/0x2, (__m128i)(__v4su){1, 2, 4, 8}), 0.0, 2.0)); + __m256d test_mm256_cvtepu32_pd(__m128i __A) { // CHECK-LABEL: test_mm256_cvtepu32_pd // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> return _mm256_cvtepu32_pd(__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_cvtepu32_pd((__m128i)(__v4su){1, 1, 2, 2}), 1.0, 1.0, 2.0, 2.0)); + __m256d test_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm256_mask_cvtepu32_pd // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} return _mm256_mask_cvtepu32_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_mask_cvtepu32_pd(_mm256_set1_pd(-777.0), /*1010=*/0xa, (__m128i)(__v4su){1, 1, 2, 2}), -777.0, 1.0, -777.0, 2.0)); + __m256d test_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepu32_pd // CHECK: uitofp <4 x i32> %{{.*}} to <4 x double> // CHECK: select <4 x i1> {{.*}}, <4 x double> {{.*}}, <4 x double> {{.*}} return _mm256_maskz_cvtepu32_pd(__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_maskz_cvtepu32_pd(/*1010=*/0xa, (__m128i)(__v4su){1, 1, 2, 2}), 0.0, 1.0, 0.0, 2.0)); + __m128 test_mm_cvtepu32_ps(__m128i __A) { // CHECK-LABEL: test_mm_cvtepu32_ps // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> return _mm_cvtepu32_ps(__A); } + +TEST_CONSTEXPR(match_m128(_mm_cvtepu32_ps((__m128i)(__v4su){1, 2, 4, 8}), 1.0f, 2.0f, 4.0f, 8.0f)); + __m128 test_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepu32_ps // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_cvtepu32_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_mask_cvtepu32_ps((__m128){-777.0f, -777.0f, -777.0f, -777.0f}, /*0101=*/0x5, (__m128i)(__v4su){1, 2, 4, 8}), 1.0f, -777.0f, 4.0f, -777.0f)); + __m128 test_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepu32_ps // CHECK: uitofp <4 x i32> %{{.*}} to <4 x float> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_cvtepu32_ps(__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_maskz_cvtepu32_ps(/*0101=*/0x5, (__m128i)(__v4su){1, 2, 4, 8}), 1.0f, 0.0f, 4.0f, 0.0f)); + __m256 test_mm256_cvtepu32_ps(__m256i __A) { // CHECK-LABEL: test_mm256_cvtepu32_ps // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> return _mm256_cvtepu32_ps(__A); } + +TEST_CONSTEXPR(match_m256(_mm256_cvtepu32_ps((__m256i)(__v8su){1, 1, 2, 2, 4, 4, 8, 8}), 1.0f, 1.0f, 2.0f, 2.0f, 4.0f, 4.0f, 8.0f, 8.0f)); + __m256 test_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepu32_ps // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_cvtepu32_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_mask_cvtepu32_ps(_mm256_set1_ps(-777.0f), /*1010 0101=*/0xa5,(__m256i)(__v8su){1, 1, 2, 2, 4, 4, 8, 8}), 1.0f, -777.0f, 2.0f, -777.0f, -777.0f, 4.0f, -777.0f, 8.0f)); + __m256 test_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepu32_ps // CHECK: uitofp <8 x i32> %{{.*}} to <8 x float> // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_cvtepu32_ps(__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_maskz_cvtepu32_ps(/*1010 0101=*/0xa5,(__m256i)(__v8su){1, 1, 2, 2, 4, 4, 8, 8}), 1.0f, 0.0f, 2.0f, 0.0f, 0.0f, 4.0f, 0.0f, 8.0f)); + __m128d test_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_mask_div_pd // CHECK: fdiv <2 x double> %{{.*}}, %{{.*}} @@ -4473,6 +4636,8 @@ __m128i test_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[ABS]], <4 x i32> %{{.*}} return _mm_mask_abs_epi32(__W,__U,__A); } +TEST_CONSTEXPR(match_v4si(_mm_mask_abs_epi32((__m128i)(__v4si){99, 99, 99, 99}, (__mmask8)0x01, (__m128i)(__v4si){-1, 2, 2, 2}), 1, 99, 99, 99)); + __m128i test_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_abs_epi32 // CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false) @@ -4481,6 +4646,8 @@ __m128i test_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[ABS]], <4 x i32> %{{.*}} return _mm_maskz_abs_epi32(__U,__A); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_abs_epi32((__mmask8)0x05, (__m128i)(__v4si){-1, 2, -3, 4}), 1, 0, 3, 0)); + __m256i test_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_abs_epi32 // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) @@ -4489,6 +4656,8 @@ __m256i test_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[ABS]], <8 x i32> %{{.*}} return _mm256_mask_abs_epi32(__W,__U,__A); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_abs_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, (__mmask8)0x01, (__m256i)(__v8si){-1, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99)); + __m256i test_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_abs_epi32 // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) @@ -4497,40 +4666,54 @@ __m256i test_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[ABS]], <8 x i32> %{{.*}} return _mm256_maskz_abs_epi32(__U,__A); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_abs_epi32((__mmask8)0x01, (__m256i)(__v8si){-1, 2, 2, 2, 2, 2, 2, 2}), 1, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_abs_epi64(__m128i __A) { // CHECK-LABEL: test_mm_abs_epi64 // CHECK: [[ABS:%.*]] = call {{.*}}<2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) return _mm_abs_epi64(__A); } +TEST_CONSTEXPR(match_v2di(_mm_abs_epi64((__m128i)(__v2di){+5, -3}), 5, 3)); + __m128i test_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_abs_epi64 // CHECK: [[ABS:%.*]] = call {{.*}}<2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[ABS]], <2 x i64> %{{.*}} return _mm_mask_abs_epi64(__W,__U,__A); } +TEST_CONSTEXPR(match_v2di(_mm_mask_abs_epi64((__m128i)(__v2di){99, 99}, (__mmask8)0x01, (__m128i)(__v2di){-1, 2}), 1, 99)); + __m128i test_mm_maskz_abs_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_abs_epi64 // CHECK: [[ABS:%.*]] = call {{.*}}<2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[ABS]], <2 x i64> %{{.*}} return _mm_maskz_abs_epi64(__U,__A); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_abs_epi64((__mmask8)0x01, (__m128i)(__v2di){-1, 2}), 1, 0)); + __m256i test_mm256_abs_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_abs_epi64 // CHECK: [[ABS:%.*]] = call {{.*}}<4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) return _mm256_abs_epi64(__A); } +TEST_CONSTEXPR(match_v4di(_mm256_abs_epi64((__m256i)(__v4di){+5, -3, -1000, 1000}), 5, 3, 1000, 1000)); + __m256i test_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_abs_epi64 // CHECK: [[ABS:%.*]] = call {{.*}}<4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[ABS]], <4 x i64> %{{.*}} return _mm256_mask_abs_epi64(__W,__U,__A); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_abs_epi64((__m256i)(__v4di){99, 99, 99, 99}, (__mmask8)0x01, (__m256i)(__v4di){-1, 2, 2, 2}), 1, 99, 99, 99)); + __m256i test_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_abs_epi64 // CHECK: [[ABS:%.*]] = call {{.*}}<4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[ABS]], <4 x i64> %{{.*}} return _mm256_maskz_abs_epi64(__U,__A); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_abs_epi64((__mmask8)0x01, (__m256i)(__v4di){-1, 2, 2, 2}), 1, 0, 0, 0)); + __m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epi32 // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) @@ -4539,6 +4722,9 @@ __m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epi32(__M,__A,__B); } + +TEST_CONSTEXPR(match_v4si(_mm_maskz_max_epi32(0x01, (__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-1, +2, -3, +4}), +1, 0, 0, 0)); + __m128i test_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_max_epi32 // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) @@ -4547,6 +4733,9 @@ __m128i test_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epi32(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v4si(_mm_mask_max_epi32((__m128i)(__v4si){-100, +200, -300, +400}, 0x01, (__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-1, +2, -3, +4}), +1, +200, -300, +400)); + __m256i test_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_max_epi32 // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) @@ -4555,6 +4744,9 @@ __m256i test_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epi32(__M,__A,__B); } + +TEST_CONSTEXPR(match_v8si(_mm256_maskz_max_epi32(0x0F, (__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-1, +2, -3, +4, -5, +6, -7, +8}), +1, +2, +3, +4, 0, 0, 0, 0)); + __m256i test_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_max_epi32 // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) @@ -4563,40 +4755,61 @@ __m256i test_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256 // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epi32(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v8si(_mm256_mask_max_epi32((__m256i)(__v8si){-100, +200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-1, +2, -3, +4, -5, +6, -7, +8}), +1, +2, +3, +4, -500, +600, -700, +800)); + __m128i test_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.smax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_max_epi64(__M,__A,__B); } + +TEST_CONSTEXPR(match_v2di(_mm_maskz_max_epi64(0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-1, +2}), +1, 0)); + __m128i test_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_max_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.smax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_max_epi64(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v2di(_mm_mask_max_epi64((__m128i)(__v2di){-100, +200}, 0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-1, +2}), +1, +200)); + __m128i test_mm_max_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_max_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.smax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_max_epi64(__A,__B); } + +TEST_CONSTEXPR(match_v2di(_mm_max_epi64((__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-1, +2}), +1, +2)); + __m256i test_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_max_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.smax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_max_epi64(__M,__A,__B); } + +TEST_CONSTEXPR(match_v4di(_mm256_maskz_max_epi64(0x01, (__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-1, +2, -3, +4}), +1, 0, 0, 0)); + __m256i test_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_max_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.smax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_max_epi64(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v4di(_mm256_mask_max_epi64((__m256i)(__v4di){-100, +200, -300, +400}, 0x01, (__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-1, +2, -3, +4}), +1, +200, -300, +400)); + __m256i test_mm256_max_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_max_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.smax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_max_epi64(__A,__B); } + +TEST_CONSTEXPR(match_v4di(_mm256_max_epi64((__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-1, +2, -3, +4}), +1, +2, +3, +4)); + __m128i test_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epu32 // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) @@ -4605,6 +4818,9 @@ __m128i test_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epu32(__M,__A,__B); } + +TEST_CONSTEXPR(match_v4su(_mm_maskz_max_epu32(0x01, (__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 3, 0, 0, 0)); + __m128i test_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_max_epu32 // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) @@ -4613,6 +4829,9 @@ __m128i test_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epu32(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v4su(_mm_mask_max_epu32((__m128i)(__v4su){100, 200, 300, 400}, 0x01, (__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 3, 200, 300, 400)); + __m256i test_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_max_epu32 // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) @@ -4621,6 +4840,9 @@ __m256i test_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epu32(__M,__A,__B); } + +TEST_CONSTEXPR(match_v8su(_mm256_maskz_max_epu32(0x0F, (__m256i)(__v8su){1, 2, 3, 4, 5, 6, 7, 8}, (__m256i)(__v8su){0, 1, 2, 3, 4, 5, 6, 7}), 1, 2, 3, 4, 0, 0, 0, 0)); + __m256i test_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_max_epu32 // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) @@ -4629,40 +4851,61 @@ __m256i test_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256 // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epu32(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v8su(_mm256_mask_max_epu32((__m256i)(__v8su){100, 200, 300, 400, 500, 600, 700, 800}, 0x0F, (__m256i)(__v8su){1, 2, 3, 4, 5, 6, 7, 8}, (__m256i)(__v8su){0, 1, 2, 3, 4, 5, 6, 7}), 1, 2, 3, 4, 500, 600, 700, 800)); + __m128i test_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.umax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_max_epu64(__M,__A,__B); } + +TEST_CONSTEXPR(match_m128i(_mm_maskz_max_epu64(0x01, (__m128i)(__v2du){0, 3}, (__m128i)(__v2du){1, 2}), 1, 0)); + __m128i test_mm_max_epu64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_max_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.umax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_max_epu64(__A,__B); } + +TEST_CONSTEXPR(match_m128i(_mm_max_epu64((__m128i)(__v2du){0, 3}, (__m128i)(__v2du){1, 2}), 1, 3)); + __m128i test_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_max_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.umax.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_max_epu64(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_m128i(_mm_mask_max_epu64((__m128i)(__v2du){100, 200}, 0x01, (__m128i)(__v2du){0, 3}, (__m128i)(__v2du){1, 2}), 1, 200)); + __m256i test_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_max_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.umax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_max_epu64(__M,__A,__B); } + +TEST_CONSTEXPR(match_m256i(_mm256_maskz_max_epu64(0x01, (__m256i)(__v4du){0, 1, 4, 3}, (__m256i)(__v4du){1, 2, 3, 4}), 1, 0, 0, 0)); + __m256i test_mm256_max_epu64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_max_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.umax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_max_epu64(__A,__B); } + +TEST_CONSTEXPR(match_m256i(_mm256_max_epu64((__m256i)(__v4du){0, 1, 4, 3}, (__m256i)(__v4du){1, 2, 3, 4}), 1, 2, 4, 4)); + __m256i test_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_max_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.umax.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_max_epu64(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_m256i(_mm256_mask_max_epu64((__m256i)(__v4du){100, 200, 300, 400}, 0x01, (__m256i)(__v4du){0, 1, 4, 3}, (__m256i)(__v4du){1, 2, 3, 4}), 1, 200, 300, 400)); + __m128i test_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_min_epi32 // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) @@ -4671,6 +4914,9 @@ __m128i test_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epi32(__M,__A,__B); } + +TEST_CONSTEXPR(match_v4si(_mm_maskz_min_epi32(0x01, (__m128i)(__v4si){-1, +2, -3, +4}, (__m128i)(__v4si){+1, -2, +3, -4}), -1, 0, 0, 0)); + __m128i test_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_min_epi32 // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) @@ -4679,6 +4925,9 @@ __m128i test_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epi32(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v4si(_mm_mask_min_epi32((__m128i)(__v4si){-100, +200, -300, +400}, 0x01, (__m128i)(__v4si){-1, +2, -3, +4}, (__m128i)(__v4si){+1, -2, +3, -4}), -1, +200, -300, +400)); + __m256i test_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_min_epi32 // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) @@ -4687,6 +4936,9 @@ __m256i test_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epi32(__M,__A,__B); } + +TEST_CONSTEXPR(match_v8si(_mm256_maskz_min_epi32(0x0F, (__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-1, +2, -3, +4, -5, +6, -7, +8}), -1, -2, -3, -4, 0, 0, 0, 0)); + __m256i test_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_min_epi32 // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) @@ -4695,40 +4947,61 @@ __m256i test_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256 // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epi32(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v8si(_mm256_mask_min_epi32((__m256i)(__v8si){-100, +200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-1, +2, -3, +4, -5, +6, -7, +8}), -1, -2, -3, -4, -500, +600, -700, +800)); + __m128i test_mm_min_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_min_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.smin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_min_epi64(__A,__B); } + +TEST_CONSTEXPR(match_v2di(_mm_min_epi64((__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-1, +2}), -1, -2)); + __m128i test_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_min_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.smin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_min_epi64(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v2di(_mm_mask_min_epi64((__m128i)(__v2di){-100, +200}, 0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-1, +2}), -1, +200)); + __m128i test_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_min_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.smin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_min_epi64(__M,__A,__B); } + +TEST_CONSTEXPR(match_v2di(_mm_maskz_min_epi64(0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-1, +2}), -1, 0)); + __m256i test_mm256_min_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_min_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.smin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_min_epi64(__A,__B); } + +TEST_CONSTEXPR(match_v4di(_mm256_min_epi64((__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-1, +2, -3, +4}), -1, -2, -3, -4)); + __m256i test_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_min_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.smin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_min_epi64(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v4di(_mm256_mask_min_epi64((__m256i)(__v4di){}, 0x01, (__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-1, +2, -3, +4}), -1, 0, 0, 0)); + __m256i test_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_min_epi64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.smin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_min_epi64(__M,__A,__B); } + +TEST_CONSTEXPR(match_v4di(_mm256_maskz_min_epi64(0x01, (__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-1, +2, -3, +4}), -1, 0, 0, 0)); + __m128i test_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_min_epu32 // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) @@ -4737,6 +5010,9 @@ __m128i test_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epu32(__M,__A,__B); } + +TEST_CONSTEXPR(match_v4su(_mm_maskz_min_epu32(0x01, (__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 1, 0, 0, 0)); + __m128i test_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_min_epu32 // CHECK: [[RES:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) @@ -4745,6 +5021,9 @@ __m128i test_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epu32(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v4su(_mm_mask_min_epu32((__m128i)(__v4su){100, 200, 300, 400}, 0x01, (__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 1, 200, 300, 400)); + __m256i test_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_min_epu32 // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) @@ -4753,6 +5032,9 @@ __m256i test_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epu32(__M,__A,__B); } + +TEST_CONSTEXPR(match_v8su(_mm256_maskz_min_epu32(0x0F, (__m256i)(__v8su){1, 2, 3, 4, 5, 6, 7, 8}, (__m256i)(__v8su){0, 1, 2, 3, 4, 5, 6, 7}), 0, 1, 2, 3, 0, 0, 0, 0)); + __m256i test_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_min_epu32 // CHECK: [[RES:%.*]] = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) @@ -4761,40 +5043,61 @@ __m256i test_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256 // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epu32(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v8su(_mm256_mask_min_epu32((__m256i)(__v8su){100, 200, 300, 400, 500, 600, 700, 800}, 0x0F, (__m256i)(__v8su){1, 2, 3, 4, 5, 6, 7, 8}, (__m256i)(__v8su){0, 1, 2, 3, 4, 5, 6, 7}), 0, 1, 2, 3, 500, 600, 700, 800)); + __m128i test_mm_min_epu64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_min_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.umin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_min_epu64(__A,__B); } + +TEST_CONSTEXPR(match_m128i(_mm_min_epu64((__m128i)(__v2du){0, 3}, (__m128i)(__v2du){1, 2}), 0, 2)); + __m128i test_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_min_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.umin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_mask_min_epu64(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_m128i(_mm_mask_min_epu64((__m128i)(__v2du){100, 200}, 0x01, (__m128i)(__v2du){0, 3}, (__m128i)(__v2du){1, 2}), 0, 200)); + __m128i test_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_min_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<2 x i64> @llvm.umin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CHECK: select <2 x i1> {{.*}}, <2 x i64> [[RES]], <2 x i64> {{.*}} return _mm_maskz_min_epu64(__M,__A,__B); } + +TEST_CONSTEXPR(match_m128i(_mm_maskz_min_epu64(0x01, (__m128i)(__v2du){0, 3}, (__m128i)(__v2du){1, 2}), 0, 0)); + __m256i test_mm256_min_epu64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_min_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.umin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_min_epu64(__A,__B); } + +TEST_CONSTEXPR(match_m256i(_mm256_min_epu64((__m256i)(__v4du){0, 1, 4, 3}, (__m256i)(__v4du){1, 2, 3, 4}), 0, 1, 3, 3)); + __m256i test_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_min_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.umin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_mask_min_epu64(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_m256i(_mm256_mask_min_epu64((__m256i)(__v4du){100, 200, 300, 400}, 0x01, (__m256i)(__v4du){0, 1, 4, 3}, (__m256i)(__v4du){1, 2, 3, 4}), 0, 200, 300, 400)); + __m256i test_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_min_epu64 // CHECK: [[RES:%.*]] = call {{.*}}<4 x i64> @llvm.umin.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) // CHECK: select <4 x i1> {{.*}}, <4 x i64> [[RES]], <4 x i64> {{.*}} return _mm256_maskz_min_epu64(__M,__A,__B); } + +TEST_CONSTEXPR(match_m256i(_mm256_maskz_min_epu64(0x01, (__m256i)(__v4du){0, 1, 4, 3}, (__m256i)(__v4du){1, 2, 3, 4}), 0, 0, 0, 0)); + __m128d test_mm_roundscale_pd(__m128d __A) { // CHECK-LABEL: test_mm_roundscale_pd // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128 @@ -5641,6 +5944,7 @@ __m128i test_mm_rol_epi32(__m128i __A) { // CHECK: @llvm.fshl.v4i32 return _mm_rol_epi32(__A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_rol_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97)); __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_rol_epi32 @@ -5648,6 +5952,7 @@ __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_rol_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_mask_rol_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 99, 99)); __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_rol_epi32 @@ -5655,12 +5960,14 @@ __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_rol_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_rol_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, 0, 0, -97)); __m256i test_mm256_rol_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_rol_epi32 // CHECK: @llvm.fshl.v8i32 return _mm256_rol_epi32(__A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_rol_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256)); __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_rol_epi32 @@ -5668,6 +5975,7 @@ __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_rol_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_rol_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99)); __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_rol_epi32 @@ -5675,12 +5983,14 @@ __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_rol_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_rol_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0)); __m128i test_mm_rol_epi64(__m128i __A) { // CHECK-LABEL: test_mm_rol_epi64 // CHECK: @llvm.fshl.v2i64 return _mm_rol_epi64(__A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_rol_epi64(((__m128i)(__v2di){10, -11}), 19), 5242880, -5242881)); __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_rol_epi64 @@ -5688,6 +5998,7 @@ __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_rol_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_mask_rol_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 5242880, 99)); __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_rol_epi64 @@ -5695,12 +6006,14 @@ __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_rol_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_rol_epi64(0x2, ((__m128i)(__v2di){10, -11}), 19), 0, -5242881)); __m256i test_mm256_rol_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_rol_epi64 // CHECK: @llvm.fshl.v4i64 return _mm256_rol_epi64(__A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_rol_epi64(((__m256i)(__v4di){10, -11, -12, 13}), 19), 5242880, -5242881, -5767169, 6815744)); __m256i test_mm256_mask_rol_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_rol_epi64 @@ -5708,6 +6021,7 @@ __m256i test_mm256_mask_rol_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_rol_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_rol_epi64(((__m256i)(__v4di){99, 99, 99, 99}), 0x9, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 5242880, 99, 99, 6815744)); __m256i test_mm256_maskz_rol_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_rol_epi64 @@ -5715,12 +6029,14 @@ __m256i test_mm256_maskz_rol_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_rol_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_rol_epi64(0xC, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 0, 0, -5767169, 6815744)); __m128i test_mm_rolv_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_rolv_epi32 // CHECK: llvm.fshl.v4i32 return _mm_rolv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_rolv_epi32((__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 16, 16, -1073741824, 2147483646)); __m128i test_mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_rolv_epi32 @@ -5728,6 +6044,7 @@ __m128i test_mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_rolv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_rolv_epi32((__m128i)(__v4si){ 999, 999, 999, 999}, 0x3, (__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 16, 16, 999, 999)); __m128i test_mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_rolv_epi32 @@ -5735,12 +6052,14 @@ __m128i test_mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_rolv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_rolv_epi32(0x3, (__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 16, 16, 0, 0)); __m256i test_mm256_rolv_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_rolv_epi32 // CHECK: @llvm.fshl.v8i32 return _mm256_rolv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_rolv_epi32((__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, -33554433, 201326592, 128, -65, -1073741824, -25, 16)); __m256i test_mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_rolv_epi32 @@ -5748,6 +6067,7 @@ __m256i test_mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_rolv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_rolv_epi32((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x69, (__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, 999, 999, 128, 999, -1073741824, -25, 999)); __m256i test_mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_rolv_epi32 @@ -5755,12 +6075,14 @@ __m256i test_mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_rolv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_rolv_epi32(0x69, (__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, 0, 0, 128, 0, -1073741824, -25, 0)); __m128i test_mm_rolv_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_rolv_epi64 // CHECK: @llvm.fshl.v2i64 return _mm_rolv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_rolv_epi64((__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 4, 4)); __m128i test_mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_rolv_epi64 @@ -5768,6 +6090,7 @@ __m128i test_mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_rolv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_rolv_epi64((__m128i)(__v2di){ 999, 999}, 0x2, (__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 999, 4)); __m128i test_mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_rolv_epi64 @@ -5775,12 +6098,14 @@ __m128i test_mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_rolv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_rolv_epi64(0x2, (__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 0, 4)); __m256i test_mm256_rolv_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_rolv_epi64 // CHECK: @llvm.fshl.v4i64 return _mm256_rolv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_rolv_epi64((__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 16, 9223372036854775807LL, 8)); __m256i test_mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_rolv_epi64 @@ -5788,6 +6113,7 @@ __m256i test_mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m25 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_rolv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_rolv_epi64((__m256i)(__v4di){ 999, 999, 999, 999}, 0xD, (__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 999, 9223372036854775807LL, 8)); __m256i test_mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_rolv_epi64 @@ -5795,12 +6121,14 @@ __m256i test_mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_rolv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_rolv_epi64(0xD, (__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 0, 9223372036854775807LL, 8)); __m128i test_mm_ror_epi32(__m128i __A) { // CHECK-LABEL: test_mm_ror_epi32 // CHECK: @llvm.fshr.v4i32 return _mm_ror_epi32(__A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_ror_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, -134217729, 402653184, -402653185)); __m128i test_mm_mask_ror_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_ror_epi32 @@ -5808,6 +6136,7 @@ __m128i test_mm_mask_ror_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_ror_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_mask_ror_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, -134217729, 99, 99)); __m128i test_mm_maskz_ror_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_ror_epi32 @@ -5815,12 +6144,14 @@ __m128i test_mm_maskz_ror_epi32(__mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_ror_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_ror_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, 0, 0, -402653185)); __m256i test_mm256_ror_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_ror_epi32 // CHECK: @llvm.fshr.v8i32 return _mm256_ror_epi32(__A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_ror_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824)); __m256i test_mm256_mask_ror_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_ror_epi32 @@ -5828,6 +6159,7 @@ __m256i test_mm256_mask_ror_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_ror_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_ror_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99)); __m256i test_mm256_maskz_ror_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_ror_epi32 @@ -5835,12 +6167,14 @@ __m256i test_mm256_maskz_ror_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_ror_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_ror_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 402653184, 0, -536870913, 805306368, 0, 0)); __m128i test_mm_ror_epi64(__m128i __A) { // CHECK-LABEL: test_mm_ror_epi64 // CHECK: @llvm.fshr.v2i64 return _mm_ror_epi64(__A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_ror_epi64(((__m128i)(__v2di){10, -11}), 19), 351843720888320LL, -351843720888321LL)); __m128i test_mm_mask_ror_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_ror_epi64 @@ -5848,6 +6182,7 @@ __m128i test_mm_mask_ror_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_ror_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_mask_ror_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 351843720888320LL, 99)); __m128i test_mm_maskz_ror_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_ror_epi64 @@ -5855,12 +6190,14 @@ __m128i test_mm_maskz_ror_epi64(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_ror_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_ror_epi64(0x2, ((__m128i)(__v2di){10, -11}), 19), 0, -351843720888321LL)); __m256i test_mm256_ror_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_ror_epi64 // CHECK: @llvm.fshr.v4i64 return _mm256_ror_epi64(__A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_ror_epi64(((__m256i)(__v4di){10, -11, -12, 13}), 19), 351843720888320LL, -351843720888321LL, -387028092977153LL, 457396837154816LL)); __m256i test_mm256_mask_ror_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_ror_epi64 @@ -5868,6 +6205,7 @@ __m256i test_mm256_mask_ror_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_ror_epi64(__W, __U, __A,5); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_ror_epi64(((__m256i)(__v4di){99, 99, 99, 99}), 0x9, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 351843720888320LL, 99, 99, 457396837154816LL)); __m256i test_mm256_maskz_ror_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_ror_epi64 @@ -5875,13 +6213,14 @@ __m256i test_mm256_maskz_ror_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_ror_epi64(__U, __A, 5); } - +TEST_CONSTEXPR(match_v4di(_mm256_maskz_ror_epi64(0xC, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 0, 0, -387028092977153LL, 457396837154816LL)); __m128i test_mm_rorv_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_rorv_epi32 // CHECK: @llvm.fshr.v4i32 return _mm_rorv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_rorv_epi32((__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 268435456, 1073741824, 12, -7)); __m128i test_mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_rorv_epi32 @@ -5889,6 +6228,7 @@ __m128i test_mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_rorv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_rorv_epi32((__m128i)(__v4si){ 999, 999, 999, 999}, 0x3, (__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 268435456, 1073741824, 999, 999)); __m128i test_mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_rorv_epi32 @@ -5896,12 +6236,14 @@ __m128i test_mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_rorv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_rorv_epi32(0x3, (__m128i)(__v4si){ 1, 2, 3, -4}, (__m128i)(__v4si){ 4, 3, -2, -1}), 268435456, 1073741824, 0, 0)); __m256i test_mm256_rorv_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_rorv_epi32 // CHECK: @llvm.fshr.v8i32 return _mm256_rorv_epi32(__A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_rorv_epi32((__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, -129, 192, 536870912, -1073741825, 48, 2147483646, 4)); __m256i test_mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_rorv_epi32 @@ -5909,6 +6251,7 @@ __m256i test_mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_rorv_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_rorv_epi32((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x69, (__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, 999, 999, 536870912, 999, 48, 2147483646, 999)); __m256i test_mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_rorv_epi32 @@ -5916,12 +6259,14 @@ __m256i test_mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_rorv_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_rorv_epi32(0x69, (__m256i)(__v8si){ -1, -2, 3, 4, -5, 6, -7, 8}, (__m256i)(__v8si){ 8, -7, -6, 5, 4, -3, 2, 1}), -1, 0, 0, 536870912, 0, 48, 2147483646, 0)); __m128i test_mm_rorv_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_rorv_epi64 // CHECK: @llvm.fshr.v2i64 return _mm_rorv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_rorv_epi64((__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 4611686018427387904LL, 1)); __m128i test_mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_rorv_epi64 @@ -5929,6 +6274,7 @@ __m128i test_mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_rorv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_rorv_epi64((__m128i)(__v2di){ 999, 999}, 0x2, (__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 999, 1)); __m128i test_mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_rorv_epi64 @@ -5936,12 +6282,14 @@ __m128i test_mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_rorv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_rorv_epi64(0x2, (__m128i)(__v2di){ 1, 2}, (__m128i)(__v2di){ 2, 1}), 0, 1)); __m256i test_mm256_rorv_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_rorv_epi64 // CHECK: @llvm.fshr.v4i64 return _mm256_rorv_epi64(__A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_rorv_epi64((__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 4611686018427387904LL, -9, 2)); __m256i test_mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_rorv_epi64 @@ -5949,6 +6297,7 @@ __m256i test_mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m25 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_rorv_epi64(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_rorv_epi64((__m256i)(__v4di){ 999, 999, 999, 999}, 0xD, (__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 999, -9, 2)); __m256i test_mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_rorv_epi64 @@ -5956,6 +6305,7 @@ __m256i test_mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_rorv_epi64(__U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_rorv_epi64(0xD, (__m256i)(__v4di){ -1, 2, -3, 4}, (__m256i)(__v4di){ 4, 3, -2, 1}), -1, 0, -9, 2)); __m128i test_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_sllv_epi64 @@ -5963,6 +6313,7 @@ __m128i test_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_sllv_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_mask_sllv_epi64((__m128i)(__v2di){99, 99}, (__mmask8)0x1, (__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 256, 99)); __m128i test_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_sllv_epi64 @@ -5970,6 +6321,7 @@ __m128i test_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_sllv_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_m128i(_mm_maskz_sllv_epi64((__mmask8)0x2, (__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 0, 0x8000000000000000ULL)); __m256i test_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_sllv_epi64 @@ -5977,6 +6329,7 @@ __m256i test_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m25 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_sllv_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_sllv_epi64((__m256i)(__v4di){99, 99, 99, 99}, (__mmask8)0x6, (__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 99, -8, 24, 99)); __m256i test_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_sllv_epi64 @@ -5984,6 +6337,7 @@ __m256i test_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_sllv_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_sllv_epi64((__mmask8)0x9, (__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 2, 0, 0, 0)); __m128i test_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_sllv_epi32 @@ -5991,6 +6345,7 @@ __m128i test_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_sllv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v4si(_mm_mask_sllv_epi32((__m128i)(__v4si){99, 99, 99, 99}, (__mmask8)0x0E, (__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 99, -8, 24, 0)); __m128i test_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_sllv_epi32 @@ -5998,6 +6353,7 @@ __m128i test_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_sllv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_sllv_epi32((__mmask8)0xC, (__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, 0, 24, 0)); __m256i test_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_sllv_epi32 @@ -6005,6 +6361,7 @@ __m256i test_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_sllv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_sllv_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, (__mmask8)0x3C, (__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 99, 99, 24, -64, 0, 0, 99, 99)); __m256i test_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_sllv_epi32 @@ -6012,6 +6369,7 @@ __m256i test_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_sllv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_sllv_epi32((__mmask8)0xFE, (__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, -8, 24, -64, 0, 0, 0, 0)); __m128i test_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_srlv_epi64 @@ -6019,6 +6377,7 @@ __m128i test_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_srlv_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_m128i(_mm_mask_srlv_epi64((__m128i)(__v2di){99, 99}, (__mmask8)0x1, (__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 0, 99)); __m128i test_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_srlv_epi64 @@ -6026,6 +6385,7 @@ __m128i test_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_srlv_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_m128i(_mm_maskz_srlv_epi64((__mmask8)0x2, (__m128i)(__v2di){1, -3}, (__m128i)(__v2di){8, 63}), 0, 1)); __m256i test_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_srlv_epi64 @@ -6033,6 +6393,7 @@ __m256i test_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m25 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_srlv_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_m256i(_mm256_mask_srlv_epi64((__m256i)(__v4di){99, 99, 99, 99}, (__mmask8)0x6, (__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 99, 0x3FFFFFFFFFFFFFFFULL, 0, 99)); __m256i test_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_srlv_epi64 @@ -6040,6 +6401,7 @@ __m256i test_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_srlv_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_m256i(_mm256_maskz_srlv_epi64((__mmask8)0x1, (__m256i)(__v4di){1, -2, 3, -4}, (__m256i)(__v4di){1, 2, 3, -4}), 0, 0, 0, 0)); __m128i test_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_srlv_epi32 @@ -6047,6 +6409,7 @@ __m128i test_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_srlv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v4si(_mm_mask_srlv_epi32((__m128i)(__v4si){99, 99, 99, 99}, (__mmask8)0x0E, (__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 99, 1073741823, 0, 0)); __m128i test_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_srlv_epi32 @@ -6054,6 +6417,7 @@ __m128i test_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_srlv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_srlv_epi32((__mmask8)0xC, (__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, 0, 0, 0)); __m256i test_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_srlv_epi32 @@ -6061,6 +6425,7 @@ __m256i test_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_srlv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_srlv_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, (__mmask8)0x3C, (__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 99, 99, 0, 268435455, 0, 1, 99, 99)); __m256i test_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_srlv_epi32 @@ -6068,6 +6433,7 @@ __m256i test_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_srlv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_srlv_epi32((__mmask8)0x9E, (__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, 1073741823, 0, 268435455, 0, 0, 0, 7)); __m128i test_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_srl_epi32 @@ -6131,6 +6497,7 @@ __m256i test_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_srli_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_srli_epi32((__m256i)(__v8si){100, 101, 102, 103, 104, 105, 106, 107}, (__mmask8)0xff, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 3), 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_mask_srli_epi32_2(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_mask_srli_epi32_2 @@ -6145,6 +6512,7 @@ __m256i test_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_srli_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_srli_epi32((__mmask8)0x71, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x7f, 0, 0, 0, 0x2, 0x2, 0x3, 0)); __m256i test_mm256_maskz_srli_epi32_2(__mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_maskz_srli_epi32_2 @@ -6214,6 +6582,7 @@ __m256i test_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_srli_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_srli_epi64((__m256i)(__v4di){100, 101, 102, 103}, (__mmask8)0b1010, (__m256i)(__v4di){0, 0xff80, 2, 3}, 1), 100, 0x7fc0, 102, 0x1)); __m256i test_mm256_mask_srli_epi64_2(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_mask_srli_epi64_2 @@ -6228,6 +6597,7 @@ __m256i test_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_srli_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_srli_epi64((__mmask8)0x71, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0x7f, 0, 0, 0)); __m256i test_mm256_maskz_srli_epi64_2(__mmask8 __U,__m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_maskz_srli_epi64_2 @@ -6298,6 +6668,7 @@ __m256i test_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_slli_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_slli_epi32((__m256i)(__v8si){100, 101, 102, 103, 104, 105, 106, 107}, (__mmask8)0xff, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 3), 0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38)); __m256i test_mm256_mask_slli_epi32_2(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_mask_slli_epi32_2 @@ -6312,6 +6683,11 @@ __m256i test_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_slli_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x00ffcc71, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 32), 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 16), 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0xff, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x7, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x1fe, 0x2, 0x4, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_slli_epi32((__mmask8)0x71, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x1fe, 0, 0, 0, 0x8, 0xa, 0xc, 0)); __m256i test_mm256_maskz_slli_epi32_2(__mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_maskz_slli_epi32_2 @@ -6382,6 +6758,7 @@ __m256i test_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_slli_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_slli_epi64((__m256i)(__v4di){100, 101, 102, 103}, (__mmask8)0b1010, (__m256i)(__v4di){0, 1, 2, 3}, 4), 100, 0x10, 102, 0x30)); __m256i test_mm256_mask_slli_epi64_2(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_mask_slli_epi64_2 @@ -6396,6 +6773,11 @@ __m256i test_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_slli_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x00ffcc71, (__m256i)(__v4di){0xff, 1, 2, 3}, 64), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0, (__m256i)(__v4di){0xff, 1, 2, 3}, 16), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0xff, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0x1fe, 0x2, 0x4, 0x6)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x7, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0x1fe, 0x2, 0x4, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_slli_epi64((__mmask8)0x71, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0x1fe, 0, 0, 0)); __m256i test_mm256_maskz_slli_epi64_2(__mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_maskz_slli_epi64_2 @@ -6410,6 +6792,7 @@ __m128i test_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_srav_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v4si(_mm_mask_srav_epi32((__m128i)(__v4si){99, 99, 99, 99}, (__mmask8)0x0E, (__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 99, -1, 0, -1)); __m128i test_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_srav_epi32 @@ -6417,6 +6800,7 @@ __m128i test_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_srav_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_srav_epi32((__mmask8)0xC, (__m128i)(__v4si){1, -2, 3, -4}, (__m128i)(__v4si){1, 2, 3, -4}), 0, 0, 0, -1)); __m256i test_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_srav_epi32 @@ -6424,6 +6808,7 @@ __m256i test_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_srav_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_srav_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, (__mmask8)0x3C, (__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 99, 99, 0, -1, 0, -1, 99, 99)); __m256i test_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_srav_epi32 @@ -6431,12 +6816,14 @@ __m256i test_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_srav_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_srav_epi32((__mmask8)0x9E, (__m256i)(__v8si){1, -2, 3, -4, 5, -6, 7, -8}, (__m256i)(__v8si){1, 2, 3, 4, -17, 31, 33, 29}), 0, -1, 0, -1, 0, 0, 0, -1)); __m128i test_mm_srav_epi64(__m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_srav_epi64 // CHECK: @llvm.x86.avx512.psrav.q.128 return _mm_srav_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_srav_epi64((__m128i)(__v2di){ 4, 5}, (__m128i)(__v2di){ 1, 2}), 2, 1)); __m128i test_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_mask_srav_epi64 @@ -6444,6 +6831,7 @@ __m128i test_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_srav_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_mask_srav_epi64((__m128i)(__v2di){ 999, 999}, 0x0, (__m128i)(__v2di){ 4, 5}, (__m128i)(__v2di){ 1, 2}), 999, 999)); __m128i test_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_srav_epi64 @@ -6451,12 +6839,14 @@ __m128i test_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_srav_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_srav_epi64(0x0, (__m128i)(__v2di){ 4, 5}, (__m128i)(__v2di){ 1, 2}), 0, 0)); __m256i test_mm256_srav_epi64(__m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_srav_epi64 // CHECK: @llvm.x86.avx512.psrav.q.256 return _mm256_srav_epi64(__X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_srav_epi64((__m256i)(__v4di){ -8, 9, -10, -11}, (__m256i)(__v4di){ 1, 2, -3, -4}), -4, 2, -1, -1)); __m256i test_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_srav_epi64 @@ -6464,6 +6854,7 @@ __m256i test_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m25 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_srav_epi64(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_srav_epi64((__m256i)(__v4di){ 999, 999, 999, 999}, 0xA, (__m256i)(__v4di){ -8, 9, -10, -11}, (__m256i)(__v4di){ 1, 2, -3, -4}), 999, 2, 999, -1)); __m256i test_mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_srav_epi64 @@ -6471,6 +6862,7 @@ __m256i test_mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_srav_epi64(__U, __X, __Y); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_srav_epi64(0xA, (__m256i)(__v4di){ -8, 9, -10, -11}, (__m256i)(__v4di){ 1, 2, -3, -4}), 0, 2, 0, -1)); void test_mm_store_epi32(void *__P, __m128i __A) { // CHECK-LABEL: test_mm_store_epi32 @@ -7684,6 +8076,7 @@ __m256i test_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_srai_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_srai_epi32((__m256i)(__v8si){100, 101, 102, 103, 104, 105, 106, 107}, (__mmask8)0xff, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, 3), 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_mask_srai_epi32_2(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_mask_srai_epi32_2 @@ -7698,6 +8091,7 @@ __m256i test_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_srai_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_srai_epi32((__mmask8)0x71, (__m256i)(__v8si){0xff, 1, 2, 3, 4, 5, 6, 7}, 1), 0x7f, 0, 0, 0, 0x2, 0x2, 0x3, 0)); __m256i test_mm256_maskz_srai_epi32_2(__mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_maskz_srai_epi32_2 @@ -7751,6 +8145,7 @@ __m128i test_mm_srai_epi64(__m128i __A) { // CHECK: @llvm.x86.avx512.psrai.q.128 return _mm_srai_epi64(__A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_srai_epi64((__m128i)(__v2di){-32768, -3}, 1), -16384, -2)); __m128i test_mm_srai_epi64_2(__m128i __A, unsigned int __B) { // CHECK-LABEL: test_mm_srai_epi64_2 @@ -7791,6 +8186,7 @@ __m256i test_mm256_srai_epi64(__m256i __A) { // CHECK: @llvm.x86.avx512.psrai.q.256 return _mm256_srai_epi64(__A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_srai_epi64((__m256i)(__v4di){-32768, 32767, -3, -2}, 1), -16384, 16383, -2, -1)); __m256i test_mm256_srai_epi64_2(__m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_srai_epi64_2 @@ -7804,6 +8200,7 @@ __m256i test_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_srai_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_srai_epi64((__m256i)(__v4di){100, 101, 102, 103}, (__mmask8)0b1010, (__m256i)(__v4di){0,-128, 2, 3}, 2), 100, -32, 102, 0x0)); __m256i test_mm256_mask_srai_epi64_2(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_mask_srai_epi64_2 @@ -7818,6 +8215,7 @@ __m256i test_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_srai_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_srai_epi64((__mmask8)0x71, (__m256i)(__v4di){0xff, 1, 2, 3}, 1), 0x7f, 0, 0, 0)); __m256i test_mm256_maskz_srai_epi64_2(__mmask8 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_maskz_srai_epi64_2 diff --git a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c index e2cfb3a348a9..e0b55c6fde81 100644 --- a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c @@ -3,6 +3,12 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index d8a2d1edf8af..d62235a630fd 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -1,9 +1,16 @@ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s #include <immintrin.h> #include "builtin_test_helpers.h" @@ -854,6 +861,7 @@ __m256i test_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __ //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_mullo_epi16(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_mullo_epi16((__m256i)(__v16hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, 0x00FF, (__m256i)(__v16hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17}, (__m256i)(__v16hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18}), -6, -12, -20, -30, -42, -56, -72, -90, -9, +10, -11, +12, -13, +14, -15, +16)); __m256i test_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_mullo_epi16 @@ -861,6 +869,7 @@ __m256i test_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_mullo_epi16(__U , __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mullo_epi16(0x00FF, (__m256i)(__v16hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17}, (__m256i)(__v16hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18}), -6, -12, -20, -30, -42, -56, -72, -90, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_mullo_epi16 @@ -868,6 +877,7 @@ __m128i test_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128 //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_mullo_epi16(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_mullo_epi16((__m128i)(__v8hi){-1, +2, -3, +4, -5, +6, -7, +8}, 0x0F, (__m128i)(__v8hi){+2, -3, +4, -5, +6, -7, +8, -9}, (__m128i)(__v8hi){-3, +4, -5, +6, -7, +8, -9, +10}), -6, -12, -20, -30, -5, +6, -7, +8)); __m128i test_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_mullo_epi16 @@ -875,6 +885,7 @@ __m128i test_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_mullo_epi16(__U , __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_mullo_epi16(0x0F, (__m128i)(__v8hi){+2, -3, +4, -5, +6, -7, +8, -9}, (__m128i)(__v8hi){-3, +4, -5, +6, -7, +8, -9, +10}), -6, -12, -20, -30, 0, 0, 0, 0)); __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) { @@ -908,6 +919,7 @@ __m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[ABS]], <16 x i8> %{{.*}} return _mm_mask_abs_epi8(__W,__U,__A); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_abs_epi8((__m128i)(__v16qi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, (__mmask16)0x0001, (__m128i)(__v16qi){(char)-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m128i test_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_abs_epi8 @@ -917,6 +929,7 @@ __m128i test_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[ABS]], <16 x i8> %{{.*}} return _mm_maskz_abs_epi8(__U,__A); } +TEST_CONSTEXPR(match_v16qi(_mm_maskz_abs_epi8((__mmask16)0x5555, (__m128i)(__v16qi){(char)-1, 2, (char)-3, 4, (char)-5, 6, (char)-7, 8, (char)-9, 10, (char)-11, 12, (char)-13, 14, (char)-15, 16}), 1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0)); __m256i test_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_abs_epi8 @@ -926,6 +939,7 @@ __m256i test_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[ABS]], <32 x i8> %{{.*}} return _mm256_mask_abs_epi8(__W,__U,__A); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_abs_epi8((__m256i)(__v32qi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, (__mmask32)0x00000001, (__m256i)(__v32qi){(char)-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m256i test_mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_abs_epi8 @@ -935,6 +949,7 @@ __m256i test_mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[ABS]], <32 x i8> %{{.*}} return _mm256_maskz_abs_epi8(__U,__A); } +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_abs_epi8((__mmask32)0x55555555, (__m256i)(__v32qi){(char)-1, 2, (char)-3, 4, (char)-5, 6, (char)-7, 8, (char)-9, 10, (char)-11, 12, (char)-13, 14, (char)-15, 16, (char)-17, 18, (char)-19, 20, (char)-21, 22, (char)-23, 24, (char)-25, 26, (char)-27, 28, (char)-29, 30, (char)-31, 32}), 1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 25, 0, 27, 0, 29, 0, 31, 0)); __m128i test_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_abs_epi16 @@ -944,6 +959,7 @@ __m128i test_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[ABS]], <8 x i16> %{{.*}} return _mm_mask_abs_epi16(__W,__U,__A); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_abs_epi16((__m128i)(__v8hi){99, 99, 99, 99, 99, 99, 99, 99}, (__mmask16)0x01, (__m128i)(__v8hi){-1, 2, 2, 2, 2, 2, 2, 2}), 1, 99, 99, 99, 99, 99, 99, 99)); __m128i test_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_abs_epi16 @@ -953,6 +969,7 @@ __m128i test_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[ABS]], <8 x i16> %{{.*}} return _mm_maskz_abs_epi16(__U,__A); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_abs_epi16((__mmask8)0x55, (__m128i)(__v8hi){-1, 2, -3, 4, -5, 6, -7, 8}), 1, 0, 3, 0, 5, 0, 7, 0)); __m256i test_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_abs_epi16 @@ -962,6 +979,7 @@ __m256i test_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[ABS]], <16 x i16> %{{.*}} return _mm256_mask_abs_epi16(__W,__U,__A); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_abs_epi16((__m256i)(__v16hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, (__mmask16)0x0001, (__m256i)(__v16hi){-128, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 128, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m256i test_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_abs_epi16 @@ -971,6 +989,7 @@ __m256i test_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[ABS]], <16 x i16> %{{.*}} return _mm256_maskz_abs_epi16(__U,__A); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_abs_epi16((__mmask16)0x0001, (__m256i)(__v16hi){-128, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}), 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_packs_epi32 @@ -1187,48 +1206,64 @@ __m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i _ // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_avg_epu8((__m128i)(__v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_avg_epu8 // CHECK: @llvm.x86.sse2.pavg.b // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v16qi(_mm_maskz_avg_epu8(0x00FF, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_avg_epu8 // CHECK: @llvm.x86.avx2.pavg.b // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_avg_epu8((__m256i)(__v32qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_avg_epu8 // CHECK: @llvm.x86.avx2.pavg.b // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_avg_epu8(0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_avg_epu16 // CHECK: @llvm.x86.sse2.pavg.w // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_avg_epu16((__m128i)(__v8hi){0, 1, 2, 3, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 0, 0, 0, 0)); + __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_avg_epu16 // CHECK: @llvm.x86.sse2.pavg.w // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_avg_epu16(0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 0, 0, 0, 0)); + __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_avg_epu16 // CHECK: @llvm.x86.avx2.pavg.w // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_avg_epu16((__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_avg_epu16 // CHECK: @llvm.x86.avx2.pavg.w // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_avg_epu16(0x00FF, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epi8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -1237,6 +1272,9 @@ __m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epi8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v16qi(_mm_maskz_max_epi8(0x00FF, (__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), +1, +2, +3, +4, +5, +6, +7, +8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_max_epi8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -1245,6 +1283,9 @@ __m128i test_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i _ // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epi8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v16qi(_mm_mask_max_epi8((__m128i)(__v16qs){+1, +2, +3, +4, +5, +6, +7, +8, -9, -10, -11, -12, -13, -14, -15, -16}, 0x00FF, (__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), +1, +2, +3, +4, +5, +6, +7, +8, -9, -10, -11, -12, -13, -14, -15, -16)); + __m256i test_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_max_epi8 // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1253,6 +1294,9 @@ __m256i test_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epi8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_max_epi8(0x0000FFFF, (__m256i)(__v32qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m256i)(__v32qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_max_epi8 // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1261,6 +1305,9 @@ __m256i test_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256 // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epi8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v32qi(_mm256_mask_max_epi8((__m256i)(__v32qs){+1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32}, 0x0000FFFF, (__m256i)(__v32qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m256i)(__v32qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32)); + __m128i test_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epi16 // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1269,6 +1316,9 @@ __m128i test_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epi16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v8hi(_mm_maskz_max_epi16(0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 5, 0, 0, 0, 0)); + __m128i test_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_max_epi16 // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1277,6 +1327,9 @@ __m128i test_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epi16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v8hi(_mm_mask_max_epi16((__m128i)(__v8hi){1, 1, 1, 1, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 5, 0, 0, 0, 0)); + __m256i test_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_max_epi16 // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) @@ -1285,6 +1338,9 @@ __m256i test_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epi16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_max_epi16(0x00FF, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), +1, +2, +3, +4, +5, +6, +7, +8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_max_epi16 // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) @@ -1293,6 +1349,9 @@ __m256i test_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m25 // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epi16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v16hi(_mm256_mask_max_epi16((__m256i)(__v16hi){1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), +1, +2, +3, +4, +5, +6, +7, +8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epu8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -1301,6 +1360,9 @@ __m128i test_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epu8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v16qu(_mm_maskz_max_epu8(0x00FF, (__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_max_epu8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -1309,6 +1371,9 @@ __m128i test_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i _ // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epu8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v16qu(_mm_mask_max_epu8((__m128i)(__v16qu){1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_max_epu8 // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1317,6 +1382,9 @@ __m256i test_mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epu8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v32qu(_mm256_maskz_max_epu8(0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_max_epu8 // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1325,6 +1393,9 @@ __m256i test_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256 // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epu8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v32qu(_mm256_mask_max_epu8((__m256i)(__v32qu){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epu16 // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1333,6 +1404,9 @@ __m128i test_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epu16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v8hu(_mm_maskz_max_epu16(0x0F, (__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 3, 4, 5, 7, 0, 0, 0, 0)); + __m128i test_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_max_epu16 // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1341,6 +1415,9 @@ __m128i test_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epu16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v8hu(_mm_mask_max_epu16((__m128i)(__v8hu){1, 1, 1, 1, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 3, 4, 5, 7, 0, 0, 0, 0)); + __m256i test_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_max_epu16 // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) @@ -1349,6 +1426,9 @@ __m256i test_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epu16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v16hu(_mm256_maskz_max_epu16(0x00FF, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_max_epu16 // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) @@ -1357,6 +1437,9 @@ __m256i test_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m25 // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epu16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v16hu(_mm256_mask_max_epu16((__m256i)(__v16hu){1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_min_epi8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -1365,6 +1448,9 @@ __m128i test_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epi8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v16qi(_mm_maskz_min_epi8(0x00FF, (__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), -1, -2, -3, -4, -5, -6, -7, -8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_min_epi8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -1373,6 +1459,9 @@ __m128i test_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i _ // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epi8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v16qi(_mm_mask_min_epi8((__m128i)(__v16qs){+1, +2, +3, +4, +5, +6, +7, +8, -9, -10, -11, -12, -13, -14, -15, -16}, 0x00FF, (__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16)); + __m256i test_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_min_epi8 // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1381,6 +1470,9 @@ __m256i test_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epi8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_min_epi8(0x0000FFFF, (__m256i)(__v32qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m256i)(__v32qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_min_epi8 // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1389,6 +1481,9 @@ __m256i test_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256 // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epi8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v32qi(_mm256_mask_min_epi8((__m256i)(__v32qs){+1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32}, 0x0000FFFF, (__m256i)(__v32qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m256i)(__v32qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32)); + __m128i test_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_min_epi16 // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1397,6 +1492,9 @@ __m128i test_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epi16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v8hi(_mm_maskz_min_epi16(0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 4, 0, 0, 0, 0)); + __m128i test_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_min_epi16 // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1405,6 +1503,9 @@ __m128i test_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epi16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v8hi(_mm_mask_min_epi16((__m128i)(__v8hi){1, 1, 1, 1, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 4, 0, 0, 0, 0)); + __m256i test_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_min_epi16 // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) @@ -1413,6 +1514,9 @@ __m256i test_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epi16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_min_epi16(0x00FF, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), -1, -2, -3, -4, -5, -6, -7, -8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_min_epi16 // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) @@ -1421,6 +1525,9 @@ __m256i test_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m25 // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epi16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v16hi(_mm256_mask_min_epi16((__m256i)(__v16hi){1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}), -1, -2, -3, -4, -5, -6, -7, -8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_min_epu8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -1429,6 +1536,9 @@ __m128i test_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epu8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v16qu(_mm_maskz_min_epu8(0x00FF, (__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_min_epu8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) @@ -1437,6 +1547,9 @@ __m128i test_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i _ // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epu8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v16qu(_mm_mask_min_epu8((__m128i)(__v16qu){1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_min_epu8 // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1445,6 +1558,9 @@ __m256i test_mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B) { // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epu8(__M,__A,__B); } + +TEST_CONSTEXPR(match_v32qu(_mm256_maskz_min_epu8(0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_min_epu8 // CHECK: [[RES:%.*]] = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1453,6 +1569,9 @@ __m256i test_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256 // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epu8(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v32qu(_mm256_mask_min_epu8((__m256i)(__v32qu){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_min_epu16 // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1461,6 +1580,9 @@ __m128i test_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epu16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v8hu(_mm_maskz_min_epu16(0x0F, (__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 1, 3, 5, 6, 0, 0, 0, 0)); + __m128i test_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_min_epu16 // CHECK: [[RES:%.*]] = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) @@ -1469,6 +1591,9 @@ __m128i test_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i _ // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epu16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v8hu(_mm_mask_min_epu16((__m128i)(__v8hu){1, 1, 1, 1, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 1, 3, 5, 6, 0, 0, 0, 0)); + __m256i test_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_min_epu16 // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) @@ -1477,6 +1602,9 @@ __m256i test_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epu16(__M,__A,__B); } + +TEST_CONSTEXPR(match_v16hu(_mm256_maskz_min_epu16(0x00FF, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_min_epu16 // CHECK: [[RES:%.*]] = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) @@ -1485,6 +1613,9 @@ __m256i test_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m25 // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epu16(__W,__M,__A,__B); } + +TEST_CONSTEXPR(match_v16hu(_mm256_mask_min_epu16((__m256i)(__v16hu){1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shuffle_epi8 // CHECK: @llvm.x86.ssse3.pshuf.b @@ -2094,6 +2225,7 @@ __m256i test_mm256_sllv_epi16(__m256i __A, __m256i __B) { // CHECK: @llvm.x86.avx512.psllv.w.256( return _mm256_sllv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_sllv_epi16((__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), -64, 0, -272, 560, -1152, -2368, 0, -9984, 0, 0, 20480, 0, -32768, 0, 0, 0)); __m256i test_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_sllv_epi16 @@ -2101,6 +2233,7 @@ __m256i test_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m2 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_sllv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_sllv_epi16((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 999, 0, -272, 999, -1152, 999, 0, 999, 0, 0, 20480, 0, -32768, 0, 0, 0)); __m256i test_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_sllv_epi16 @@ -2108,12 +2241,14 @@ __m256i test_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_sllv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_sllv_epi16(0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 0, 0, -272, 0, -1152, 0, 0, 0, 0, 0, 20480, 0, -32768, 0, 0, 0)); __m128i test_mm_sllv_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_sllv_epi16 // CHECK: @llvm.x86.avx512.psllv.w.128( return _mm_sllv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_sllv_epi16((__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 32, 68, 0, 0, -640, 0, 0, 5888)); __m128i test_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_sllv_epi16 @@ -2121,6 +2256,7 @@ __m128i test_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_sllv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_sllv_epi16((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 32, 68, 999, 999, -640, 999, 999, 5888)); __m128i test_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_sllv_epi16 @@ -2128,6 +2264,7 @@ __m128i test_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_sllv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_sllv_epi16(0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 32, 68, 0, 0, -640, 0, 0, 5888)); __m128i test_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_sll_epi16 @@ -2191,6 +2328,7 @@ __m256i test_mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_slli_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_slli_epi16((__m256i)(__v16hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, (__mmask16)0xAAAA, (__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 20), 100, 0, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0)); __m256i test_mm256_mask_slli_epi16_2(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_mask_slli_epi16_2 @@ -2205,6 +2343,11 @@ __m256i test_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_slli_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x00ffcc71, (__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0, (__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0xffff, (__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x1fe, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x7, (__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x1fe, 0x2, 0x4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_slli_epi16((__mmask16)0x71, (__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x1fe, 0, 0, 0, 0x8, 0xa, 0xc, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_maskz_slli_epi16_2(__mmask16 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_maskz_slli_epi16_2 @@ -2218,6 +2361,7 @@ __m256i test_mm256_srlv_epi16(__m256i __A, __m256i __B) { // CHECK: @llvm.x86.avx512.psrlv.w.256( return _mm256_srlv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_srlv_epi16((__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 32752, 0, 8187, 2, 2046, 1023, 0, 255, 0, 0, 0, 0, 0, 0, 1, 0)); __m256i test_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_srlv_epi16 @@ -2225,6 +2369,7 @@ __m256i test_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m2 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_srlv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_srlv_epi16((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 999, 0, 8187, 999, 2046, 999, 0, 999, 0, 0, 0, 0, 0, 0, 1, 0)); __m256i test_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_srlv_epi16 @@ -2232,12 +2377,14 @@ __m256i test_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_srlv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srlv_epi16(0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 0, 0, 8187, 0, 2046, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0)); __m128i test_mm_srlv_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_srlv_epi16 // CHECK: @llvm.x86.avx512.psrlv.w.128( return _mm_srlv_epi16(__A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_srlv_epi16((__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 0, 0, 2047, 0, 0, 0)); __m128i test_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_srlv_epi16 @@ -2245,6 +2392,7 @@ __m128i test_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_srlv_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_srlv_epi16((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 999, 999, 2047, 999, 999, 0)); __m128i test_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_srlv_epi16 @@ -2252,6 +2400,7 @@ __m128i test_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_srlv_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_srlv_epi16(0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 0, 0, 2047, 0, 0, 0)); __m128i test_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_srl_epi16 @@ -2315,6 +2464,8 @@ __m256i test_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_srli_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_srli_epi16((__m256i)(__v16hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, (__mmask16)0xAAAA, (__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 20), 100, 0, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_srli_epi16((__m256i)(__v16hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, (__mmask16)0xAAAA, (__m256i)(__v16hi){0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480}, 5), 100, 1, 102, 3, 104, 5, 106, 7, 108, 9, 110, 11, 112, 13, 114, 15)); __m256i test_mm256_mask_srli_epi16_2(__m256i __W, __mmask16 __U, __m256i __A, int __B) { // CHECK-LABEL: test_mm256_mask_srli_epi16_2 @@ -2329,6 +2480,7 @@ __m256i test_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_srli_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srli_epi16((__mmask16)0x71, (__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x7f, 0, 0, 0, 0x2, 0x2, 0x3, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_maskz_srli_epi16_2(__mmask16 __U, __m256i __A, int __B) { // CHECK-LABEL: test_mm256_maskz_srli_epi16_2 @@ -2342,6 +2494,7 @@ __m256i test_mm256_srav_epi16(__m256i __A, __m256i __B) { // CHECK: @llvm.x86.avx512.psrav.w.256( return _mm256_srav_epi16(__A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_srav_epi16((__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), -16, 0, -5, 2, -2, -1, 0, -1, -1, -1, 0, 0, 0, -1, -1, -1)); __m256i test_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_srav_epi16 @@ -2349,6 +2502,7 @@ __m256i test_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m2 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_srav_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_srav_epi16((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 999, 0, -5, 999, -2, 999, 0, 999, -1, -1, 0, 0, 0, -1, -1, -1)); __m256i test_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_srav_epi16 @@ -2356,12 +2510,14 @@ __m256i test_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_srav_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srav_epi16(0xFF56, (__m256i)(__v16hi){ -32, 33, -34, 35, -36, -37, 38, -39, -40, -41, 42, 43, 44, -45, -46, -47}, (__m256i)(__v16hi){ 1, -2, 3, 4, 5, 6, -7, 8, -9, -10, 11, -12, 13, -14, 15, 16}), 0, 0, -5, 0, -2, 0, 0, 0, -1, -1, 0, 0, 0, -1, -1, -1)); __m128i test_mm_srav_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_srav_epi16 // CHECK: @llvm.x86.avx512.psrav.w.128( return _mm_srav_epi16(__A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_srav_epi16((__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 0, -1, -1, -1, 0, 0)); __m128i test_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_srav_epi16 @@ -2369,6 +2525,7 @@ __m128i test_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_srav_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_srav_epi16((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 999, 999, -1, 999, 999, 0)); __m128i test_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_srav_epi16 @@ -2376,6 +2533,7 @@ __m128i test_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_srav_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_srav_epi16(0x93, (__m128i)(__v8hi){ 16, 17, 18, -19, -20, -21, 22, 23}, (__m128i)(__v8hi){ 1, 2, -3, -4, 5, -6, -7, 8}), 8, 4, 0, 0, -1, 0, 0, 0)); __m128i test_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_sra_epi16 @@ -2439,6 +2597,7 @@ __m256i test_mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_srai_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_srai_epi16((__m256i)(__v16hi){100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, (__mmask16)0xAAAA, (__m256i)(__v16hi){0, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 20), 100, 0Xffff, 102, 0, 104, 0, 106, 0, 108, 0, 110, 0, 112, 0, 114, 0)); __m256i test_mm256_mask_srai_epi16_2(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_mask_srai_epi16_2 @@ -2453,6 +2612,7 @@ __m256i test_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_srai_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_srai_epi16((__mmask16)0x71, (__m256i)(__v16hi){0xff, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 1), 0x7f, 0, 0, 0, 0x2, 0x2, 0x3, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_maskz_srai_epi16_2(__mmask16 __U, __m256i __A, unsigned int __B) { // CHECK-LABEL: test_mm256_maskz_srai_epi16_2 diff --git a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c index faa3b54624a7..6b76da3e8bfc 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c +++ b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c @@ -7,31 +7,45 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64 -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=x86_64 -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=x86_64 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> +#include "builtin_test_helpers.h" short test_mm_reduce_add_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_add_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_add_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_add_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 36); short test_mm_reduce_mul_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_mul_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_mul_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_mul_epi16((__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 72); short test_mm_reduce_or_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_or_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_or_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_or_epi16((__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15); short test_mm_reduce_and_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_and_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_and_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_and_epi16((__m128i)(__v8hi){1,3,5,7,9,11,13,15}) == 1); short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_add_epi16 @@ -39,6 +53,8 @@ short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_add_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_add_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 26); +TEST_CONSTEXPR(_mm_mask_reduce_add_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 10); short test_mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_mul_epi16 @@ -46,6 +62,8 @@ short test_mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_mul_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_mul_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 12); +TEST_CONSTEXPR(_mm_mask_reduce_mul_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 6); short test_mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_and_epi16 @@ -53,6 +71,8 @@ short test_mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}} return _mm_mask_reduce_and_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_and_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){1,3,5,7,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm_mask_reduce_and_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){1,3,5,7,0,0,0,0}) == 1); short test_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_or_epi16 @@ -60,30 +80,36 @@ short test_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_or_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_or_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm_mask_reduce_or_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15); short test_mm256_reduce_add_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_add_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_add_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_add_epi16((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 120); short test_mm256_reduce_mul_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_mul_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_mul_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_mul_epi16((__m256i)(__v16hi){1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1}) == 7776); short test_mm256_reduce_or_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_or_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_or_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_or_epi16((__m256i)(__v16hi){1,2,4,8,16,32,64,128,0,0,0,0,0,0,0,0}) == 255); short test_mm256_reduce_and_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_and_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_and_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_and_epi16((__m256i)(__v16hi){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31}) == 1); short test_mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_add_epi16 @@ -91,6 +117,8 @@ short test_mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_add_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_add_epi16((__mmask16)0b1111111100000000, (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 84); +TEST_CONSTEXPR(_mm256_mask_reduce_add_epi16((__mmask16)0b0000000011111111, (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 36); short test_mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_mul_epi16 @@ -98,6 +126,8 @@ short test_mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_mul_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_mul_epi16((__mmask16)0b1111111100000000, (__m256i)(__v16hi){1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1}) == 108); +TEST_CONSTEXPR(_mm256_mask_reduce_mul_epi16((__mmask16)0b0000000011111111, (__m256i)(__v16hi){1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1}) == 72); short test_mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_and_epi16 @@ -105,6 +135,8 @@ short test_mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_and_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_and_epi16((__mmask16)0b1111111100000000, (__m256i)(__v16hi){1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm256_mask_reduce_and_epi16((__mmask16)0b0000000011111111, (__m256i)(__v16hi){1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0}) == 1); short test_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_or_epi16 @@ -112,30 +144,36 @@ short test_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_or_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_or_epi16((__mmask16)0b1111111100000000, (__m256i)(__v16hi){1,2,4,8,16,32,64,128,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm256_mask_reduce_or_epi16((__mmask16)0b0000000011111111, (__m256i)(__v16hi){1,2,4,8,16,32,64,128,0,0,0,0,0,0,0,0}) == 255); signed char test_mm_reduce_add_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_add_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_add_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_add_epi8((__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 120); signed char test_mm_reduce_mul_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_mul_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_mul_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_mul_epi8((__m128i)(__v16qs){1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1}) == 32); signed char test_mm_reduce_and_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_and_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_and_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_and_epi8((__m128i)(__v16qs){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31}) == 1); signed char test_mm_reduce_or_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_or_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_or_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_or_epi8((__m128i)(__v16qs){0,1,2,4,8,16,32,64,0,0,0,0,0,0,0,0}) == 127); signed char test_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_add_epi8 @@ -143,6 +181,8 @@ signed char test_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_add_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_add_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 84); +TEST_CONSTEXPR(_mm_mask_reduce_add_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 36); signed char test_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_mul_epi8 @@ -150,6 +190,8 @@ signed char test_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_mul_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_mul_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1}) == 4); +TEST_CONSTEXPR(_mm_mask_reduce_mul_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1}) == 8); signed char test_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_and_epi8 @@ -157,6 +199,8 @@ signed char test_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_and_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_and_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm_mask_reduce_and_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0}) == 1); signed char test_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_or_epi8 @@ -164,30 +208,36 @@ signed char test_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_or_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_or_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){0,1,2,4,8,16,32,64,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm_mask_reduce_or_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){0,1,2,4,8,16,32,64,0,0,0,0,0,0,0,0}) == 127); signed char test_mm256_reduce_add_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_add_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_add_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_add_epi8((__m256i)(__v32qs){0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}) == 112); signed char test_mm256_reduce_mul_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_mul_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_mul_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_mul_epi8((__m256i)(__v32qs){1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2}) == 16); signed char test_mm256_reduce_and_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_and_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_and_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_and_epi8((__m256i)(__v32qs){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63}) == 1); signed char test_mm256_reduce_or_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_or_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_or_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_or_epi8((__m256i)(__v32qs){1,2,4,8,16,32,64,127,1,2,4,8,16,32,64,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 127); signed char test_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_add_epi8 @@ -195,6 +245,8 @@ signed char test_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_add_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_add_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}) == 56); +TEST_CONSTEXPR(_mm256_mask_reduce_add_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){8,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}) == 64); signed char test_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_mul_epi8 @@ -202,6 +254,8 @@ signed char test_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_mul_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_mul_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2}) == 4); +TEST_CONSTEXPR(_mm256_mask_reduce_mul_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){4,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2}) == 16); signed char test_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_and_epi8 @@ -209,6 +263,8 @@ signed char test_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_and_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_and_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm256_mask_reduce_and_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 1); signed char test_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_or_epi8 @@ -216,30 +272,36 @@ signed char test_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_or_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_or_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){1,2,4,8,16,32,64,127,1,2,4,8,16,32,64,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm256_mask_reduce_or_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){1,2,4,8,16,32,64,127,1,2,4,8,16,32,64,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 127); short test_mm_reduce_max_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_max_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_max_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_max_epi16((__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == 4); short test_mm_reduce_min_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_min_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_min_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_min_epi16((__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == -4); unsigned short test_mm_reduce_max_epu16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_max_epu16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_max_epu16(__W); } +TEST_CONSTEXPR(_mm_reduce_max_epu16((__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 8); unsigned short test_mm_reduce_min_epu16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_min_epu16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_min_epu16(__W); } +TEST_CONSTEXPR(_mm_reduce_min_epu16((__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 1); short test_mm_mask_reduce_max_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_max_epi16 @@ -247,6 +309,8 @@ short test_mm_mask_reduce_max_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_max_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_max_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == 4); +TEST_CONSTEXPR(_mm_mask_reduce_max_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == -1); short test_mm_mask_reduce_min_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_min_epi16 @@ -254,6 +318,8 @@ short test_mm_mask_reduce_min_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_min_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_min_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == 1); +TEST_CONSTEXPR(_mm_mask_reduce_min_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == -4); unsigned short test_mm_mask_reduce_max_epu16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_max_epu16 @@ -261,6 +327,8 @@ unsigned short test_mm_mask_reduce_max_epu16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_max_epu16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_max_epu16((__mmask8)0b11110000, (__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 8); +TEST_CONSTEXPR(_mm_mask_reduce_max_epu16((__mmask8)0b00001111, (__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 4); unsigned short test_mm_mask_reduce_min_epu16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_min_epu16 @@ -268,30 +336,36 @@ unsigned short test_mm_mask_reduce_min_epu16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_min_epu16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_min_epu16((__mmask8)0b11110000, (__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 5); +TEST_CONSTEXPR(_mm_mask_reduce_min_epu16((__mmask8)0b00001111, (__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 1); short test_mm256_reduce_max_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_max_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_max_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_max_epi16((__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 8); short test_mm256_reduce_min_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_min_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_min_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_min_epi16((__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -8); unsigned short test_mm256_reduce_max_epu16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_max_epu16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_max_epu16(__W); } +TEST_CONSTEXPR(_mm256_reduce_max_epu16((__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); unsigned short test_mm256_reduce_min_epu16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_min_epu16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_min_epu16(__W); } +TEST_CONSTEXPR(_mm256_reduce_min_epu16((__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); short test_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_max_epi16 @@ -299,6 +373,8 @@ short test_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_max_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_max_epi16((__mmask16){0b1111111100000000}, (__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 8); +TEST_CONSTEXPR(_mm256_mask_reduce_max_epi16((__mmask16){0b0000000011111111}, (__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -1); short test_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_min_epi16 @@ -306,6 +382,8 @@ short test_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_min_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_min_epi16((__mmask16){0b1111111100000000}, (__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 1); +TEST_CONSTEXPR(_mm256_mask_reduce_min_epi16((__mmask16){0b0000000011111111}, (__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -8); unsigned short test_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_max_epu16 @@ -313,6 +391,8 @@ unsigned short test_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_max_epu16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_max_epu16((__mmask16){0b1111111100000000}, (__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); +TEST_CONSTEXPR(_mm256_mask_reduce_max_epu16((__mmask16){0b0000000011111111}, (__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 8); unsigned short test_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_min_epu16 @@ -320,30 +400,36 @@ unsigned short test_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_min_epu16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_min_epu16((__mmask16){0b1111111100000000}, (__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 9); +TEST_CONSTEXPR(_mm256_mask_reduce_min_epu16((__mmask16){0b0000000011111111}, (__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); signed char test_mm_reduce_max_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_max_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_max_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_max_epi8((__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 8); signed char test_mm_reduce_min_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_min_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_min_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_min_epi8((__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -8); unsigned char test_mm_reduce_max_epu8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_max_epu8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_max_epu8(__W); } +TEST_CONSTEXPR(_mm_reduce_max_epu8((__m128i)(__v16qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); unsigned char test_mm_reduce_min_epu8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_min_epu8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_min_epu8(__W); } +TEST_CONSTEXPR(_mm_reduce_min_epu8((__m128i)(__v16qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); signed char test_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_max_epi8 @@ -351,6 +437,8 @@ signed char test_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_max_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_max_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 8); +TEST_CONSTEXPR(_mm_mask_reduce_max_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -1); signed char test_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_min_epi8 @@ -358,6 +446,8 @@ signed char test_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_min_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_min_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 1); +TEST_CONSTEXPR(_mm_mask_reduce_min_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -8); unsigned char test_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_max_epu8 @@ -365,6 +455,8 @@ unsigned char test_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_max_epu8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_max_epu8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); +TEST_CONSTEXPR(_mm_mask_reduce_max_epu8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 8); unsigned char test_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_min_epu8 @@ -372,30 +464,36 @@ unsigned char test_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_min_epu8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_min_epu8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 9); +TEST_CONSTEXPR(_mm_mask_reduce_min_epu8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); signed char test_mm256_reduce_max_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_max_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_max_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_max_epi8((__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); signed char test_mm256_reduce_min_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_min_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_min_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_min_epi8((__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == -16); unsigned char test_mm256_reduce_max_epu8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_max_epu8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_max_epu8(__W); } +TEST_CONSTEXPR(_mm256_reduce_max_epu8((__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 32); unsigned char test_mm256_reduce_min_epu8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_min_epu8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_min_epu8(__W); } +TEST_CONSTEXPR(_mm256_reduce_min_epu8((__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 1); signed char test_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_max_epi8 @@ -403,6 +501,8 @@ signed char test_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_max_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_max_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); +TEST_CONSTEXPR(_mm256_mask_reduce_max_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == -1); signed char test_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_min_epi8 @@ -410,6 +510,8 @@ signed char test_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_min_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_min_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); +TEST_CONSTEXPR(_mm256_mask_reduce_min_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == -16); unsigned char test_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_max_epu8 @@ -417,6 +519,8 @@ unsigned char test_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_max_epu8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_max_epu8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 32); +TEST_CONSTEXPR(_mm256_mask_reduce_max_epu8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 16); unsigned char test_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_min_epu8 @@ -424,3 +528,5 @@ unsigned char test_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_min_epu8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_min_epu8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 17); +TEST_CONSTEXPR(_mm256_mask_reduce_min_epu8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 1); diff --git a/clang/test/CodeGen/X86/avx512vlcd-builtins.c b/clang/test/CodeGen/X86/avx512vlcd-builtins.c index 939fd6460e6a..1619305dd521 100644 --- a/clang/test/CodeGen/X86/avx512vlcd-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlcd-builtins.c @@ -2,9 +2,14 @@ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vl -target-feature +avx512cd -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vl -target-feature +avx512cd -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vl -target-feature +avx512cd -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vl -target-feature +avx512cd -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vl -target-feature +avx512cd -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vl -target-feature +avx512cd -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vl -target-feature +avx512cd -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" __m128i test_mm_broadcastmb_epi64(__m128i a,__m128i b) { // CHECK-LABEL: test_mm_broadcastmb_epi64 @@ -136,80 +141,132 @@ __m256i test_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) { __m128i test_mm_lzcnt_epi32(__m128i __A) { // CHECK-LABEL: test_mm_lzcnt_epi32 - // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 false) + // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <4 x i32> %{{.*}}, zeroinitializer + // CHECK: select <4 x i1> [[ISZERO]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_lzcnt_epi32(__A); } +TEST_CONSTEXPR(match_v4si(_mm_lzcnt_epi32((__m128i)(__v4si){8, 16, 32, 64}), 28, 27, 26, 25)); +TEST_CONSTEXPR(match_v4si(_mm_lzcnt_epi32((__m128i)(__v4si){0, 0, 0, 0}), 32, 32, 32, 32)); + __m128i test_mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_lzcnt_epi32 - // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 false) + // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <4 x i32> %{{.*}}, zeroinitializer + // CHECK: select <4 x i1> [[ISZERO]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_lzcnt_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v4si(_mm_mask_lzcnt_epi32(_mm_set1_epi32(32), /*0000 0101=*/0x5, (__m128i)(__v4si){8, 16, 32, 64}), 28, 32, 26, 32)); + __m128i test_mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_lzcnt_epi32 - // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 false) + // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <4 x i32> %{{.*}}, zeroinitializer + // CHECK: select <4 x i1> [[ISZERO]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_lzcnt_epi32(__U, __A); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_lzcnt_epi32(/*0000 0101=*/0x5, (__m128i)(__v4si){8, 16, 32, 64}), 28, 0, 26, 0)); + __m256i test_mm256_lzcnt_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_lzcnt_epi32 - // CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 false) + // CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <8 x i32> %{{.*}}, zeroinitializer + // CHECK: select <8 x i1> [[ISZERO]], <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_lzcnt_epi32(__A); } +TEST_CONSTEXPR(match_v8si(_mm256_lzcnt_epi32((__m256i)(__v8si){1, 2, 4, 8, 16, 32, 64, 128}), 31, 30, 29, 28, 27, 26, 25, 24)); +TEST_CONSTEXPR(match_v8si(_mm256_lzcnt_epi32((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}), 32, 32, 32, 32, 32, 32, 32, 32)); + __m256i test_mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_lzcnt_epi32 - // CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 false) + // CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <8 x i32> %{{.*}}, zeroinitializer + // CHECK: select <8 x i1> [[ISZERO]], <8 x i32> %{{.*}}, <8 x i32> %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_lzcnt_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_lzcnt_epi32(_mm256_set1_epi32(32), /*0101 0101=*/0x55, (__m256i)(__v8si){1, 2, 4, 8, 16, 32, 64, 128}), 31, 32, 29, 32, 27, 32, 25, 32)); + __m256i test_mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_lzcnt_epi32 - // CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 false) + // CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <8 x i32> %{{.*}}, zeroinitializer + // CHECK: select <8 x i1> [[ISZERO]], <8 x i32> %{{.*}}, <8 x i32> %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_lzcnt_epi32(__U, __A); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_lzcnt_epi32(/*0101 0101=*/0x55, (__m256i)(__v8si){1, 2, 4, 8, 16, 32, 64, 128}), 31, 0, 29, 0, 27, 0, 25, 0)); + __m128i test_mm_lzcnt_epi64(__m128i __A) { // CHECK-LABEL: test_mm_lzcnt_epi64 - // CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <2 x i64> %{{.*}}, zeroinitializer + // CHECK: select <2 x i1> [[ISZERO]], <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_lzcnt_epi64(__A); } +TEST_CONSTEXPR(match_v2di(_mm_lzcnt_epi64((__m128i)(__v2di){1, 2}), 63, 62)); +TEST_CONSTEXPR(match_v2di(_mm_lzcnt_epi64((__m128i)(__v2di){0, 0}), 64, 64)); + __m128i test_mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_lzcnt_epi64 - // CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <2 x i64> %{{.*}}, zeroinitializer + // CHECK: select <2 x i1> [[ISZERO]], <2 x i64> %{{.*}}, <2 x i64> %{{.*}} // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_lzcnt_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v2di(_mm_mask_lzcnt_epi64(_mm_set1_epi64x((long long)64), /*0000 0010=*/0x2, (__m128i)(__v2di){1, 2}), 64, 62)); + __m128i test_mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_lzcnt_epi64 - // CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <2 x i64> %{{.*}}, zeroinitializer + // CHECK: select <2 x i1> [[ISZERO]], <2 x i64> %{{.*}}, <2 x i64> %{{.*}} // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_lzcnt_epi64(__U, __A); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_lzcnt_epi64(/*0000 0010=*/0x2, (__m128i)(__v2di){1, 2}), 0, 62)); + __m256i test_mm256_lzcnt_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_lzcnt_epi64 - // CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <4 x i64> %{{.*}}, zeroinitializer + // CHECK: select <4 x i1> [[ISZERO]], <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_lzcnt_epi64(__A); } +TEST_CONSTEXPR(match_v4di(_mm256_lzcnt_epi64((__m256i)(__v4di){1, 2, 4, 8}), 63, 62, 61, 60)); +TEST_CONSTEXPR(match_v4di(_mm256_lzcnt_epi64((__m256i)(__v4di){0, 0, 0, 0}), 64, 64, 64, 64)); + __m256i test_mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_lzcnt_epi64 - // CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <4 x i64> %{{.*}}, zeroinitializer + // CHECK: select <4 x i1> [[ISZERO]], <4 x i64> %{{.*}}, <4 x i64> %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_lzcnt_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_lzcnt_epi64(_mm256_set1_epi64x((long long) 64), /*0000 0110=*/0x6, (__m256i)(__v4di){1, 2, 4, 8}), 64, 62, 61, 64)); + __m256i test_mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_lzcnt_epi64 - // CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 false) + // CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 true) + // CHECK: [[ISZERO:%.+]] = icmp eq <4 x i64> %{{.*}}, zeroinitializer + // CHECK: select <4 x i1> [[ISZERO]], <4 x i64> %{{.*}}, <4 x i64> %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_lzcnt_epi64(__U, __A); } + +TEST_CONSTEXPR(match_v4di(_mm256_maskz_lzcnt_epi64(/*0000 0011*/0x3, (__m256i)(__v4di){1, 2, 4, 8}), 63, 62, 0, 0)); diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index 66ba0c704668..802784472163 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -12,6 +17,7 @@ __m256i test_mm256_mullo_epi64 (__m256i __A, __m256i __B) { // CHECK: mul <4 x i64> return _mm256_mullo_epi64(__A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mullo_epi64((__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, +12, +20)); __m256i test_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_mullo_epi64 @@ -19,6 +25,7 @@ __m256i test_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return (__m256i) _mm256_mask_mullo_epi64 ( __W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_mullo_epi64((__m256i)(__v4di){-100, +200, -300, +400}, 0x03, (__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, -300, +400)); __m256i test_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_mullo_epi64 @@ -26,12 +33,14 @@ __m256i test_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return (__m256i) _mm256_maskz_mullo_epi64 (__U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_mullo_epi64(0x03, (__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, 0, 0)); __m128i test_mm_mullo_epi64 (__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mullo_epi64 // CHECK: mul <2 x i64> return (__m128i) _mm_mullo_epi64(__A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mullo_epi64((__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-3, +4}), -3, -8)); __m128i test_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_mullo_epi64 @@ -39,6 +48,7 @@ __m128i test_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128 // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return (__m128i) _mm_mask_mullo_epi64 ( __W, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_mullo_epi64((__m128i)(__v2di){-100, +200}, 0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-3, +4}), -3, +200)); __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_mullo_epi64 @@ -46,6 +56,7 @@ __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return (__m128i) _mm_maskz_mullo_epi64 (__U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_mullo_epi64(0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-3, +4}), -3, 0)); __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: test_mm256_mask_andnot_pd @@ -429,6 +440,8 @@ __m128d test_mm_cvtepi64_pd(__m128i __A) { return _mm_cvtepi64_pd(__A); } +TEST_CONSTEXPR(match_m128d(_mm_cvtepi64_pd((__m128i)(__v2di){-1, -1}), -1.0, -1.0)); + __m128d test_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepi64_pd // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double> @@ -436,6 +449,8 @@ __m128d test_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) { return _mm_mask_cvtepi64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_mask_cvtepi64_pd((__m128d){-777.0, -777.0}, /*01=*/0x1, (__m128i)(__v2di){-1, -1}), -1.0, -777.0)); + __m128d test_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepi64_pd // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double> @@ -443,12 +458,16 @@ __m128d test_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) { return _mm_maskz_cvtepi64_pd(__U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_cvtepi64_pd(/*01=*/0x1, (__m128i)(__v2di){-1, -1}), -1.0, 0.0)); + __m256d test_mm256_cvtepi64_pd(__m256i __A) { // CHECK-LABEL: test_mm256_cvtepi64_pd // CHECK: sitofp <4 x i64> %{{.*}} to <4 x double> return _mm256_cvtepi64_pd(__A); } +TEST_CONSTEXPR(match_m256d(_mm256_cvtepi64_pd((__m256i)(__v4di){-1, -1, 2, 2}), -1.0, -1.0, 2.0, 2.0)); + __m256d test_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepi64_pd // CHECK: sitofp <4 x i64> %{{.*}} to <4 x double> @@ -456,6 +475,8 @@ __m256d test_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) { return _mm256_mask_cvtepi64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_cvtepi64_pd((__m256d){-777.0, -777.0, -777.0, -777.0}, /*1100*/0xc, (__m256i)(__v4di){-1, -1, 2, 2}), -777.0, -777.0, 2.0, 2.0)); + __m256d test_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepi64_pd // CHECK: sitofp <4 x i64> %{{.*}} to <4 x double> @@ -463,6 +484,8 @@ __m256d test_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) { return _mm256_maskz_cvtepi64_pd(__U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_cvtepi64_pd(/*1100*/0xc, (__m256i)(__v4di){-1, -1, 2, 2}), 0.0, 0.0, 2.0, 2.0)); + __m128 test_mm_cvtepi64_ps(__m128i __A) { // CHECK-LABEL: test_mm_cvtepi64_ps // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.128 @@ -487,6 +510,8 @@ __m128 test_mm256_cvtepi64_ps(__m256i __A) { return _mm256_cvtepi64_ps(__A); } +TEST_CONSTEXPR(match_m128(_mm256_cvtepi64_ps((__m256i)(__v4di){-1, -1, 2, 2}), -1.0f, -1.0f, 2.0f, 2.0f)); + __m128 test_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepi64_ps // CHECK: sitofp <4 x i64> %{{.*}} to <4 x float> @@ -494,6 +519,8 @@ __m128 test_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) { return _mm256_mask_cvtepi64_ps(__W, __U, __A); } +TEST_CONSTEXPR(match_m128(_mm256_mask_cvtepi64_ps((__m128){-777.0f, -777.0f, -777.0f, -777.0f}, /*1010=*/0xa, (__m256i)(__v4di){-1, -1, 2, 2}), -777.0f, -1.0f, -777.0f, 2.0f)); + __m128 test_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepi64_ps // CHECK: sitofp <4 x i64> %{{.*}} to <4 x float> @@ -501,6 +528,8 @@ __m128 test_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) { return _mm256_maskz_cvtepi64_ps(__U, __A); } +TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtepi64_ps(/*1010=*/0xa, (__m256i)(__v4di){-1, -1, 2, 2}), 0.0f, -1.0f, 0.0f, 2.0f)); + __m128i test_mm_cvttpd_epi64(__m128d __A) { // CHECK-LABEL: test_mm_cvttpd_epi64 // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.128 @@ -651,6 +680,8 @@ __m128d test_mm_cvtepu64_pd(__m128i __A) { return _mm_cvtepu64_pd(__A); } +TEST_CONSTEXPR(match_m128d(_mm_cvtepu64_pd((__m128i)(__v2du){1, 1}), 1.0, 1.0)); + __m128d test_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepu64_pd // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double> @@ -658,6 +689,8 @@ __m128d test_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) { return _mm_mask_cvtepu64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_mask_cvtepu64_pd((__m128d){-777.0, -777.0}, /*01=*/0x1, (__m128i)(__v2du){1, 1}), 1.0, -777.0)); + __m128d test_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_cvtepu64_pd // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double> @@ -665,12 +698,16 @@ __m128d test_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) { return _mm_maskz_cvtepu64_pd(__U, __A); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_cvtepu64_pd(/*01=*/0x1, (__m128i)(__v2du){1, 1}), 1.0, 0.0)); + __m256d test_mm256_cvtepu64_pd(__m256i __A) { // CHECK-LABEL: test_mm256_cvtepu64_pd // CHECK: uitofp <4 x i64> %{{.*}} to <4 x double> return _mm256_cvtepu64_pd(__A); } +TEST_CONSTEXPR(match_m256d(_mm256_cvtepu64_pd((__m256i)(__v4du){1, 1, 2, 2}), 1.0, 1.0, 2.0, 2.0)); + __m256d test_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepu64_pd // CHECK: uitofp <4 x i64> %{{.*}} to <4 x double> @@ -678,6 +715,8 @@ __m256d test_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) { return _mm256_mask_cvtepu64_pd(__W, __U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_cvtepu64_pd((__m256d){-777.0, -777.0, -777.0, -777.0}, /*1100*/0xc, (__m256i)(__v4du){1, 1, 2, 2}), -777.0, -777.0, 2.0, 2.0)); + __m256d test_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepu64_pd // CHECK: uitofp <4 x i64> %{{.*}} to <4 x double> @@ -685,6 +724,8 @@ __m256d test_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) { return _mm256_maskz_cvtepu64_pd(__U, __A); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_cvtepu64_pd(/*1100*/0xc, (__m256i)(__v4du){1, 1, 2, 2}), 0.0, 0.0, 2.0, 2.0)); + __m128 test_mm_cvtepu64_ps(__m128i __A) { // CHECK-LABEL: test_mm_cvtepu64_ps // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.128 @@ -709,6 +750,8 @@ __m128 test_mm256_cvtepu64_ps(__m256i __A) { return _mm256_cvtepu64_ps(__A); } +TEST_CONSTEXPR(match_m128(_mm256_cvtepu64_ps((__m256i)(__v4du){1, 1, 2, 2}), 1.0f, 1.0f, 2.0f, 2.0f)); + __m128 test_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_cvtepu64_ps // CHECK: uitofp <4 x i64> %{{.*}} to <4 x float> @@ -716,6 +759,8 @@ __m128 test_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) { return _mm256_mask_cvtepu64_ps(__W, __U, __A); } +TEST_CONSTEXPR(match_m128(_mm256_mask_cvtepu64_ps((__m128){-777.0f, -777.0f, -777.0f, -777.0f}, /*1010=*/0xa, (__m256i)(__v4du){1, 1, 2, 2}), -777.0f, 1.0f, -777.0f, 2.0f)); + __m128 test_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_cvtepu64_ps // CHECK: uitofp <4 x i64> %{{.*}} to <4 x float> @@ -723,6 +768,8 @@ __m128 test_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { return _mm256_maskz_cvtepu64_ps(__U, __A); } +TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtepu64_ps(/*1010=*/0xa, (__m256i)(__v4du){1, 1, 2, 2}), 0.0f, 1.0f, 0.0f, 2.0f)); + __m128d test_mm_range_pd(__m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_range_pd // CHECK: @llvm.x86.avx512.mask.range.pd.128 diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c index 4e8bffc12cf4..fd6ea8fe6056 100644 --- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c @@ -3,7 +3,14 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + + #include <immintrin.h> +#include "builtin_test_helpers.h" _Float16 test_mm_cvtsh_h(__m128h __A) { // CHECK-LABEL: test_mm_cvtsh_h @@ -43,6 +50,8 @@ __m128h test_mm_set1_ph(_Float16 h) { return _mm_set1_ph(h); } +TEST_CONSTEXPR(match_m128h(_mm_set1_ph(-777.0), -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0)); + __m256h test_mm256_set1_ph(_Float16 h) { // CHECK-LABEL: test_mm256_set1_ph // CHECK: insertelement <16 x half> {{.*}}, i32 0 @@ -64,6 +73,8 @@ __m256h test_mm256_set1_ph(_Float16 h) { return _mm256_set1_ph(h); } +TEST_CONSTEXPR(match_m256h(_mm256_set1_ph(-777.0), -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0, -777.0)); + __m128h test_mm_set1_pch(_Float16 _Complex h) { // CHECK-LABEL: test_mm_set1_pch // CHECK: insertelement <4 x float> {{.*}}, i32 0 @@ -401,12 +412,14 @@ __m128h test_mm_abs_ph(__m128h a) { // CHECK: and <4 x i32> return _mm_abs_ph(a); } +TEST_CONSTEXPR(match_m128h(_mm_abs_ph((__m128h){-1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)); __m256h test_mm256_abs_ph(__m256h a) { // CHECK-LABEL: test_mm256_abs_ph // CHECK: and <8 x i32> return _mm256_abs_ph(a); } +TEST_CONSTEXPR(match_m256h(_mm256_abs_ph((__m256h){-1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, 16.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0)); __m256h test_mm256_conj_pch(__m256h __A) { // CHECK-LABEL: test_mm256_conj_pch @@ -1790,36 +1803,48 @@ __m128h test_mm_cvtepi16_ph(__m128i A) { return _mm_cvtepi16_ph(A); } +TEST_CONSTEXPR(match_m128h(_mm_cvtepi16_ph((__m128i)(__v8hi){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0)); + __m128h test_mm_mask_cvtepi16_ph(__m128h A, __mmask8 B, __m128i C) { // CHECK-LABEL: test_mm_mask_cvtepi16_ph // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half> return _mm_mask_cvtepi16_ph(A, B, C); } +TEST_CONSTEXPR(match_m128h(_mm_mask_cvtepi16_ph(_mm_set1_ph(-777.0), /*1001 0011=*/0x93, (__m128i)(__v8hi){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, -777.0, -777.0, 4.0, -777.0, -777.0, 8.0)); + __m128h test_mm_maskz_cvtepi16_ph(__mmask8 A, __m128i B) { // CHECK-LABEL: test_mm_maskz_cvtepi16_ph // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half> return _mm_maskz_cvtepi16_ph(A, B); } +TEST_CONSTEXPR(match_m128h(_mm_maskz_cvtepi16_ph(/*1001 0011=*/0x93, (__m128i)(__v8hi){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 8.0)); + __m256h test_mm256_cvtepi16_ph(__m256i A) { // CHECK-LABEL: test_mm256_cvtepi16_ph // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half> return _mm256_cvtepi16_ph(A); } +TEST_CONSTEXPR(match_m256h(_mm256_cvtepi16_ph((__m256i)(__v16hi){-1, -1, 2, 2, -4, -4, 8, 8, -16, -16, 32, 32, -64, -64, 128, 128}), -1.0, -1.0, 2.0, 2.0, -4.0, -4.0, 8.0, 8.0, -16.0, -16.0, 32.0, 32.0, -64.0, -64.0, 128.0, 128.0)); + __m256h test_mm256_mask_cvtepi16_ph(__m256h A, __mmask16 B, __m256i C) { // CHECK-LABEL: test_mm256_mask_cvtepi16_ph // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half> return _mm256_mask_cvtepi16_ph(A, B, C); } +TEST_CONSTEXPR(match_m256h(_mm256_mask_cvtepi16_ph(_mm256_set1_ph(-777.0), /*1101 0101 1101 1100=*/0xd5dc, (__m256i)(__v16hi){-1, -1, 2, 2, -4, -4, 8, 8, -16, -16, 32, 32, -64, -64, 128, 128}), -777.0, -777.0, 2.0, 2.0, -4.0, -777.0, 8.0, 8.0, -16.0, -777.0, 32.0, -777.0, -64.0, -777.0, 128.0, 128.0)); + __m256h test_mm256_maskz_cvtepi16_ph(__mmask16 A, __m256i B) { // CHECK-LABEL: test_mm256_maskz_cvtepi16_ph // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half> return _mm256_maskz_cvtepi16_ph(A, B); } +TEST_CONSTEXPR(match_m256h(_mm256_maskz_cvtepi16_ph(/*1101 0101 1101 1100=*/0xd5dc, (__m256i)(__v16hi){-1, -1, 2, 2, -4, -4, 8, 8, -16, -16, 32, 32, -64, -64, 128, 128}), 0.0, 0.0, 2.0, 2.0, -4.0, 0.0, 8.0, 8.0, -16.0, 0.0, 32.0, 0.0, -64.0, 0.0, 128.0, 128.0)); + __m128i test_mm_cvtph_epu16(__m128h A) { // CHECK-LABEL: test_mm_cvtph_epu16 // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128 @@ -1898,18 +1923,24 @@ __m128h test_mm_cvtepu16_ph(__m128i A) { return _mm_cvtepu16_ph(A); } +TEST_CONSTEXPR(match_m128h(_mm_cvtepu16_ph((__m128i)(__v8hu){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0)); + __m128h test_mm_mask_cvtepu16_ph(__m128h A, __mmask8 B, __m128i C) { // CHECK-LABEL: test_mm_mask_cvtepu16_ph // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half> return _mm_mask_cvtepu16_ph(A, B, C); } +TEST_CONSTEXPR(match_m128h(_mm_mask_cvtepu16_ph(_mm_set1_ph(-777.0), /*1001 0011=*/0x93, (__m128i)(__v8hu){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, -777.0, -777.0, 4.0, -777.0, -777.0, 8.0)); + __m128h test_mm_maskz_cvtepu16_ph(__mmask8 A, __m128i B) { // CHECK-LABEL: test_mm_maskz_cvtepu16_ph // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half> return _mm_maskz_cvtepu16_ph(A, B); } +TEST_CONSTEXPR(match_m128h(_mm_maskz_cvtepu16_ph(/*1001 0011=*/0x93, (__m128i)(__v8hu){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 8.0)); + __m256h test_mm256_cvtepu16_ph(__m256i A) { // CHECK-LABEL: test_mm256_cvtepu16_ph // CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half> diff --git a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c index 7259325dd2e3..e1e8578ea414 100644 --- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vl -target-feature +avx512vbmi2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" __m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { // CHECK-LABEL: test_mm_mask_compress_epi16 @@ -179,6 +180,7 @@ __m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shldi_epi64(((__m256i)(__v4di){ 999, 999, 999, 999}), 0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 999, 12384898975268864LL)); __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi64 @@ -186,12 +188,14 @@ __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shldi_epi64(0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 0, 12384898975268864LL)); __m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi64 // CHECK: call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31)) return _mm256_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v4di(_mm256_shldi_epi64(((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 11258999068426240LL, 12384898975268864LL)); __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi64 @@ -199,6 +203,7 @@ __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shldi_epi64(((__m128i)(__v2di){ 999, 999}), 0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 999, -160)); __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi64 @@ -206,12 +211,14 @@ __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shldi_epi64(0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 0, -160)); __m128i test_mm_shldi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi64 // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v2di(_mm_shldi_epi64(((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), -97, -160)); __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldi_epi32 @@ -219,6 +226,7 @@ __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shldi_epi32(((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}), 0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 999, 999, 9216, -9217, 10240, 999, -11264, -11776)); __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi32 @@ -226,12 +234,14 @@ __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shldi_epi32(0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 0, 0, 9216, -9217, 10240, 0, -11264, -11776)); __m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi32 // CHECK: call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31)) return _mm256_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v8si(_mm256_shldi_epi32(((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), -8192, 9215, 9216, -9217, 10240, 10752, -11264, -11776)); __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi32 @@ -239,6 +249,7 @@ __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shldi_epi32(((__m128i)(__v4si){ 999, 999, 999, 999}), 0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 999, 11263, -11264)); __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi32 @@ -246,12 +257,14 @@ __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shldi_epi32(0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 0, 11263, -11264)); __m128i test_mm_shldi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v4si(_mm_shldi_epi32(((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 9216, 11263, -11264)); __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldi_epi16 @@ -259,6 +272,7 @@ __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shldi_epi16(((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 999, 999, 999, 999, -27648, 999, -24577, 25599, 999, 22528, 999, 21503, 999, 999, 999)); __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi16 @@ -266,12 +280,14 @@ __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shldi_epi16(0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 0, 0, 0, 0, -27648, 0, -24577, 25599, 0, 22528, 0, 21503, 0, 0, 0)); __m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi16 // CHECK: call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shldi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v16hi(_mm256_shldi_epi16(((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 32767, 30720, -28673, 29695, -27648, 27647, -24577, 25599, 23552, 22528, 21504, 21503, 20479, 19455, 18431)); __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi16 @@ -279,6 +295,7 @@ __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shldi_epi16(((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 999, 999, -4608, -4864, 5375, 999, 999, 6143)); __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi16 @@ -286,12 +303,14 @@ __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shldi_epi16(0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 0, 0, -4608, -4864, 5375, 0, 0, 6143)); __m128i test_mm_shldi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shldi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v8hi(_mm_shldi_epi16(((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 4351, 4607, -4608, -4864, 5375, -5376, -5632, 6143)); __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi64 @@ -299,6 +318,7 @@ __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shrdi_epi64(((__m256i)(__v4di){ 999, 999, 999, 999}), 0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 999, 65536)); __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi64 @@ -306,12 +326,14 @@ __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shrdi_epi64(0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 0, 65536)); __m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi64 // CHECK: call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31) return _mm256_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v4di(_mm256_shrdi_epi64(((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 49152, 65536)); __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi64 @@ -319,6 +341,7 @@ __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shrdi_epi64(((__m128i)(__v2di){ 999, 999}), 0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 999, 1729382256910270463LL)); __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi64 @@ -326,12 +349,14 @@ __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shrdi_epi64(0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 0, 1729382256910270463LL)); __m128i test_mm_shrdi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi64 // CHECK: call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v2di(_mm_shrdi_epi64(((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), -1, 1729382256910270463LL)); __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi32 @@ -339,6 +364,7 @@ __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shrdi_epi32(((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}), 0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 999, 999, 25165824, -25165825, 41943040, 999, 67108863, 75497471)); __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi32 @@ -346,12 +372,14 @@ __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shrdi_epi32(0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 0, 0, 25165824, -25165825, 41943040, 0, 67108863, 75497471)); __m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi32 // CHECK: call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31) return _mm256_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v8si(_mm256_shrdi_epi32(((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 16777215, -16777216, 25165824, -25165825, 41943040, 50331648, 67108863, 75497471)); __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi32 @@ -359,6 +387,7 @@ __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shrdi_epi32(((__m128i)(__v4si){ 999, 999, 999, 999}), 0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 999, -12582912, 20971519)); __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi32 @@ -366,12 +395,14 @@ __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shrdi_epi32(0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 0, -12582912, 20971519)); __m128i test_mm_shrdi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi32 // CHECK: call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v4si(_mm_shrdi_epi32(((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 8388608, -12582912, 20971519)); __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi16 @@ -379,6 +410,7 @@ __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shrdi_epi16(((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, 999, 999, 999, 999, 384, 999, -512, -513, 999, 767, 999, -769, 999, 999, 999)); __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi16 @@ -386,12 +418,14 @@ __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shrdi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shrdi_epi16(0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, 0, 0, 0, 0, 384, 0, -512, -513, 0, 767, 0, -769, 0, 0, 0)); __m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi16 // CHECK: call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v16hi(_mm256_shrdi_epi16(((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, -65, 255, -256, -257, 384, -385, -512, -513, 703, 767, 831, -769, -833, -897, -961)); __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi16 @@ -399,6 +433,7 @@ __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shrdi_epi16(((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 999, 999, 1023, 1279, -1280, 999, 999, -2048)); __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi16 @@ -406,12 +441,14 @@ __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shrdi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shrdi_epi16(0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 0, 0, 1023, 1279, -1280, 0, 0, -2048)); __m128i test_mm_shrdi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi16 // CHECK: call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v8hi(_mm_shrdi_epi16(((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), -256, -512, 1023, 1279, -1280, 1791, 2047, -2048)); __m256i test_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldv_epi64 @@ -419,6 +456,7 @@ __m256i test_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shldv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shldv_epi64((__m256i)(__v4di){ -8, 9, 10, -11}, 0x9, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -8070450532247928833LL, 9, 10, -22)); __m256i test_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldv_epi64 @@ -426,12 +464,14 @@ __m256i test_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shldv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shldv_epi64(0x9, (__m256i)(__v4di){ -8, 9, 10, -11}, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -8070450532247928833LL, 0, 0, -22)); __m256i test_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldv_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_shldv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_shldv_epi64((__m256i)(__v4di){ -8, 9, 10, -11}, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -8070450532247928833LL, 4611686018427387903LL, 43, -22)); __m128i test_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldv_epi64 @@ -439,6 +479,7 @@ __m128i test_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shldv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shldv_epi64((__m128i)(__v2di){ -4, -5}, 0x1, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -13, -5)); __m128i test_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldv_epi64 @@ -446,12 +487,14 @@ __m128i test_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shldv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shldv_epi64(0x1, (__m128i)(__v2di){ -4, -5}, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -13, 0)); __m128i test_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldv_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_shldv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_shldv_epi64((__m128i)(__v2di){ -4, -5}, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -13, -10)); __m256i test_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldv_epi32 @@ -459,6 +502,7 @@ __m256i test_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shldv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shldv_epi32((__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, 0xDF, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 268435456, -2049, -1152, 1879048191, -320, -21, -85, -4)); __m256i test_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldv_epi32 @@ -466,12 +510,14 @@ __m256i test_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shldv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shldv_epi32(0xDF, (__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 268435456, -2049, -1152, 1879048191, -320, 0, -85, -4)); __m256i test_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldv_epi32 // CHECK: call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_shldv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_shldv_epi32((__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 268435456, -2049, -1152, 1879048191, -320, -161, -85, -4)); __m128i test_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldv_epi32 @@ -479,6 +525,7 @@ __m128i test_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shldv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shldv_epi32((__m128i)(__v4si){ -8, -9, -10, -11}, 0xD, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), -2147483648, -9, -1073741825, -22)); __m128i test_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldv_epi32 @@ -486,12 +533,14 @@ __m128i test_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shldv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shldv_epi32(0xD, (__m128i)(__v4si){ -8, -9, -10, -11}, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), -2147483648, 0, -1073741825, -22)); __m128i test_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldv_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_shldv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_shldv_epi32((__m128i)(__v4si){ -8, -9, -10, -11}, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), -2147483648, -1, -1073741825, -22)); __m256i test_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldv_epi16 @@ -499,6 +548,7 @@ __m256i test_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shldv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shldv_epi16((__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, 0x12D6, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 32, -1, -32768, 35, -561, 37, 27647, -19968, -40, 21503, -42, 43, 16384, 45, -46, 47)); __m256i test_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldv_epi16 @@ -506,12 +556,14 @@ __m256i test_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shldv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shldv_epi16(0x12D6, (__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 0, -1, -32768, 0, -561, 0, 27647, -19968, 0, 21503, 0, 0, 16384, 0, 0, 0)); __m256i test_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldv_epi16 // CHECK: call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_shldv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_shldv_epi16((__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 32, -1, -32768, 287, -561, 1215, 27647, -19968, -9985, 21503, -2625, 24575, 16384, 360, -32765, 95)); __m128i test_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldv_epi16 @@ -519,6 +571,7 @@ __m128i test_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shldv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shldv_epi16((__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, 0x3A, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), -16, 8704, -18, -577, 335, 168, 22, -23)); __m128i test_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldv_epi16 @@ -526,12 +579,14 @@ __m128i test_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shldv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shldv_epi16(0x3A, (__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), 0, 8704, 0, -577, 335, 168, 0, 0)); __m128i test_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldv_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_shldv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_shldv_epi16((__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), -4096, 8704, -18432, -577, 335, 168, 91, -4)); __m256i test_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdv_epi64 @@ -539,6 +594,7 @@ __m256i test_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shrdv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shrdv_epi64((__m256i)(__v4di){ -8, 9, 10, -11}, 0x9, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -1, 9, 10, 9223372036854775802LL)); __m256i test_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdv_epi64 @@ -546,12 +602,14 @@ __m256i test_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shrdv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shrdv_epi64(0x9, (__m256i)(__v4di){ -8, 9, 10, -11}, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -1, 0, 0, 9223372036854775802LL)); __m256i test_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdv_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_shrdv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v4di(_mm256_shrdv_epi64((__m256i)(__v4di){ -8, 9, 10, -11}, (__m256i)(__v4di){ -1, -2, -3, 4}, (__m256i)(__v4di){ -4, -3, 2, 1}), -1, -16, 4611686018427387906LL, 9223372036854775802LL)); __m128i test_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdv_epi64 @@ -559,6 +617,7 @@ __m128i test_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shrdv_epi64(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shrdv_epi64((__m128i)(__v2di){ -4, -5}, 0x1, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -1, -5)); __m128i test_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdv_epi64 @@ -566,12 +625,14 @@ __m128i test_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shrdv_epi64(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shrdv_epi64(0x1, (__m128i)(__v2di){ -4, -5}, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -1, 0)); __m128i test_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdv_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_shrdv_epi64(__S, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_shrdv_epi64((__m128i)(__v2di){ -4, -5}, (__m128i)(__v2di){ -1, 2}, (__m128i)(__v2di){ 2, 1}), -1, 9223372036854775805LL)); __m256i test_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdv_epi32 @@ -579,6 +640,7 @@ __m256i test_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shrdv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shrdv_epi32((__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, 0xDF, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 256, -33554433, 268435455, -97, 1610612734, -21, 2147483642, -16)); __m256i test_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdv_epi32 @@ -586,12 +648,14 @@ __m256i test_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shrdv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shrdv_epi32(0xDF, (__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 256, -33554433, 268435455, -97, 1610612734, 0, 2147483642, -16)); __m256i test_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdv_epi32 // CHECK: call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_shrdv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_shrdv_epi32((__m256i)(__v8si){ 16, -17, -18, -19, -20, -21, -22, 23}, (__m256i)(__v8si){ 1, -2, 3, -4, 5, -6, -7, -8}, (__m256i)(__v8si){ -8, 7, 6, -5, 4, 3, 2, -1}), 256, -33554433, 268435455, -97, 1610612734, 1610612733, 2147483642, -16)); __m128i test_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdv_epi32 @@ -599,6 +663,7 @@ __m128i test_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shrdv_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shrdv_epi32((__m128i)(__v4si){ -8, -9, -10, -11}, 0xD, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), 31, -9, -9, 2147483642)); __m128i test_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdv_epi32 @@ -606,12 +671,14 @@ __m128i test_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shrdv_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shrdv_epi32(0xD, (__m128i)(__v4si){ -8, -9, -10, -11}, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), 31, 0, -9, 2147483642)); __m128i test_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdv_epi32 // CHECK: call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_shrdv_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_shrdv_epi32((__m128i)(__v4si){ -8, -9, -10, -11}, (__m128i)(__v4si){ 1, -2, -3, 4}, (__m128i)(__v4si){ -4, -3, -2, 1}), 31, -9, -9, 2147483642)); __m256i test_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdv_epi16 @@ -619,6 +686,7 @@ __m256i test_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shrdv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shrdv_epi16((__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, 0x12D6, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 32, -3, 12, 35, -16387, 37, -385, 1151, -40, -1280, -42, 43, 223, 45, -46, 47)); __m256i test_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdv_epi16 @@ -626,12 +694,14 @@ __m256i test_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shrdv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shrdv_epi16(0x12D6, (__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 0, -3, 12, 0, -16387, 0, -385, 1151, 0, -1280, 0, 0, 223, 0, 0, 0)); __m256i test_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdv_epi16 // CHECK: call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_shrdv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_shrdv_epi16((__m256i)(__v16hi){ 32, -33, 34, 35, -36, 37, -38, -39, -40, 41, -42, 43, -44, 45, -46, 47}, (__m256i)(__v16hi){ -1, -2, 3, -4, -5, -6, -7, 8, -9, -10, -11, -12, 13, 14, 15, -16}, (__m256i)(__v16hi){ 16, 15, 14, -13, -12, -11, 10, 9, -8, -7, 6, -5, -4, 3, -2, 1}), 32, -3, 12, -32764, -16387, -12287, -385, 1151, -2049, -1280, -10241, -384, 223, -16379, 63, 23)); __m128i test_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdv_epi16 @@ -639,6 +709,7 @@ __m128i test_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shrdv_epi16(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shrdv_epi16((__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, 0x3A, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), -16, 256, -18, -6145, -20479, -16382, 22, -23)); __m128i test_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdv_epi16 @@ -646,10 +717,12 @@ __m128i test_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shrdv_epi16(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shrdv_epi16(0x3A, (__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), 0, 256, 0, -6145, -20479, -16382, 0, 0)); __m128i test_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdv_epi16 // CHECK: call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_shrdv_epi16(__S, __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_shrdv_epi16((__m128i)(__v8hi){ -16, 17, -18, -19, 20, 21, 22, -23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, 6, -7, -8}, (__m128i)(__v8hi){ 8, -7, -6, 5, 4, 3, 2, -1}), 511, 256, 255, -6145, -20479, -16382, 16389, -15)); diff --git a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c index 3de4cca1a7e2..f63b5c6e7391 100644 --- a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c @@ -7,41 +7,41 @@ __m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dpbusd_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B); } __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dpbusd_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B); } __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_epi32(__S, __A, __B); } __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dpbusds_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B); } __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dpbusds_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B); } __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_epi32(__S, __A, __B); } @@ -87,41 +87,41 @@ __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dpbusd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_dpbusd_epi32(__S, __U, __A, __B); } __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dpbusd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_dpbusd_epi32(__U, __S, __A, __B); } __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_epi32(__S, __A, __B); } __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dpbusds_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_dpbusds_epi32(__S, __U, __A, __B); } __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dpbusds_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_dpbusds_epi32(__U, __S, __A, __B); } __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_epi32(__S, __A, __B); } diff --git a/clang/test/CodeGen/X86/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c index a0177b3ba0a2..afe80458e37c 100644 --- a/clang/test/CodeGen/X86/avx512vnni-builtins.c +++ b/clang/test/CodeGen/X86/avx512vnni-builtins.c @@ -7,41 +7,41 @@ __m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_dpbusd_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B); } __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_dpbusd_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B); } __m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_dpbusd_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_dpbusd_epi32(__S, __A, __B); } __m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_dpbusds_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B); } __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_dpbusds_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B); } __m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_dpbusds_epi32 - // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_dpbusds_epi32(__S, __A, __B); } diff --git a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c index b80ffdf0dcc7..4fcc34e3f3b5 100644 --- a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c @@ -3,6 +3,12 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c index 7258034c57ad..8e36b35ff857 100644 --- a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c @@ -3,6 +3,12 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/avxvnni-builtins.c b/clang/test/CodeGen/X86/avxvnni-builtins.c index bb28a359424c..7948e0d57d9b 100644 --- a/clang/test/CodeGen/X86/avxvnni-builtins.c +++ b/clang/test/CodeGen/X86/avxvnni-builtins.c @@ -7,13 +7,13 @@ __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_epi32(__S, __A, __B); } __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_epi32(__S, __A, __B); } @@ -31,13 +31,13 @@ __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_epi32(__S, __A, __B); } __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_epi32(__S, __A, __B); } @@ -55,13 +55,13 @@ __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { __m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_avx_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_avx_epi32(__S, __A, __B); } __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_avx_epi32 - // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_avx_epi32(__S, __A, __B); } @@ -79,13 +79,13 @@ __m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { __m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_avx_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_avx_epi32(__S, __A, __B); } __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_avx_epi32 - // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_avx_epi32(__S, __A, __B); } diff --git a/clang/test/CodeGen/X86/avxvnniint16-builtins.c b/clang/test/CodeGen/X86/avxvnniint16-builtins.c index c25367bdd850..941da9aa223b 100644 --- a/clang/test/CodeGen/X86/avxvnniint16-builtins.c +++ b/clang/test/CodeGen/X86/avxvnniint16-builtins.c @@ -1,11 +1,11 @@ // RUN: %clang_cc1 -x c %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avxvnniint16 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avxvnniint16 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avxvnniint16 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avxvnniint16 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c++ %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c++ %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avxvnniint8-builtins.c b/clang/test/CodeGen/X86/avxvnniint8-builtins.c index f808dee40519..dd4a4483abaa 100644 --- a/clang/test/CodeGen/X86/avxvnniint8-builtins.c +++ b/clang/test/CodeGen/X86/avxvnniint8-builtins.c @@ -1,11 +1,11 @@ // RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64- -target-feature +avxvnniint8 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386- -target-feature +avxvnniint8 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64- -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386- -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64- -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386- -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64- -target-feature +avxvnniint8 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386- -target-feature +avxvnniint8 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64- -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386- -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64- -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386- -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/bitscan-builtins.c b/clang/test/CodeGen/X86/bitscan-builtins.c index 9fd466641785..06c69945d2e5 100644 --- a/clang/test/CodeGen/X86/bitscan-builtins.c +++ b/clang/test/CodeGen/X86/bitscan-builtins.c @@ -1,11 +1,16 @@ // RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -no-enable-noundef-analysis -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -no-enable-noundef-analysis -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -no-enable-noundef-analysis -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -no-enable-noundef-analysis -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s + // PR33722 // RUN: %clang_cc1 -x c -ffreestanding %s -triple x86_64-unknown-unknown -fms-extensions -fms-compatibility-version=19.00 -no-enable-noundef-analysis -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple x86_64-unknown-unknown -fms-extensions -fms-compatibility-version=19.00 -no-enable-noundef-analysis -emit-llvm -o - | FileCheck %s + #include <x86intrin.h> +#include "builtin_test_helpers.h" int test_bit_scan_forward(int a) { // CHECK-LABEL: test_bit_scan_forward @@ -13,6 +18,8 @@ int test_bit_scan_forward(int a) { // CHECK: ret i32 %[[call]] return _bit_scan_forward(a); } +TEST_CONSTEXPR(_bit_scan_forward(0x00000001) == 0); +TEST_CONSTEXPR(_bit_scan_forward(0x10000000) == 28); int test_bit_scan_reverse(int a) { // CHECK-LABEL: test_bit_scan_reverse @@ -21,18 +28,24 @@ int test_bit_scan_reverse(int a) { // CHECK: ret i32 %[[sub]] return _bit_scan_reverse(a); } +TEST_CONSTEXPR(_bit_scan_reverse(0x00000001) == 0); +TEST_CONSTEXPR(_bit_scan_reverse(0x01000000) == 24); int test__bsfd(int X) { // CHECK-LABEL: test__bsfd // CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true) return __bsfd(X); } +TEST_CONSTEXPR(__bsfd(0x00000008) == 3); +TEST_CONSTEXPR(__bsfd(0x00010008) == 3); int test__bsfq(long long X) { // CHECK-LABEL: test__bsfq // CHECK: %[[call:.*]] = call i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true) return __bsfq(X); } +TEST_CONSTEXPR(__bsfq(0x0000000800000000ULL) == 35); +TEST_CONSTEXPR(__bsfq(0x0004000000000000ULL) == 50); int test__bsrd(int X) { // CHECK-LABEL: test__bsrd @@ -40,6 +53,8 @@ int test__bsrd(int X) { // CHECK: %[[sub:.*]] = sub nsw i32 31, %[[call]] return __bsrd(X); } +TEST_CONSTEXPR(__bsrd(0x00000010) == 4); +TEST_CONSTEXPR(__bsrd(0x00100100) == 20); int test__bsrq(long long X) { // CHECK-LABEL: test__bsrq @@ -48,26 +63,5 @@ int test__bsrq(long long X) { // CHECK: %[[sub:.*]] = sub nsw i32 63, %[[cast]] return __bsrq(X); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) - -char bsf_0[_bit_scan_forward(0x00000001) == 0 ? 1 : -1]; -char bsf_1[_bit_scan_forward(0x10000000) == 28 ? 1 : -1]; - -char bsr_0[_bit_scan_reverse(0x00000001) == 0 ? 1 : -1]; -char bsr_1[_bit_scan_reverse(0x01000000) == 24 ? 1 : -1]; - -char bsfd_0[__bsfd(0x00000008) == 3 ? 1 : -1]; -char bsfd_1[__bsfd(0x00010008) == 3 ? 1 : -1]; - -char bsrd_0[__bsrd(0x00000010) == 4 ? 1 : -1]; -char bsrd_1[__bsrd(0x00100100) == 20 ? 1 : -1]; - -char bsfq_0[__bsfq(0x0000000800000000ULL) == 35 ? 1 : -1]; -char bsfq_1[__bsfq(0x0004000000000000ULL) == 50 ? 1 : -1]; - -char bsrq_0[__bsrq(0x0000100800000000ULL) == 44 ? 1 : -1]; -char bsrq_1[__bsrq(0x0004000100000000ULL) == 50 ? 1 : -1]; - -#endif +TEST_CONSTEXPR(__bsrq(0x0000100800000000ULL) == 44); +TEST_CONSTEXPR(__bsrq(0x0004000100000000ULL) == 50); diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h index 6541ca4fd922..fcaf360626a2 100644 --- a/clang/test/CodeGen/X86/builtin_test_helpers.h +++ b/clang/test/CodeGen/X86/builtin_test_helpers.h @@ -13,11 +13,21 @@ constexpr bool match_v1di(__m64 v, long long a) { return v[0] == a;
}
+constexpr bool match_v1du(__m64 _v, unsigned long long a) {
+ __v1du v = (__v1du)_v;
+ return v[0] == a;
+}
+
constexpr bool match_v2si(__m64 _v, int a, int b) {
__v2si v = (__v2si)_v;
return v[0] == a && v[1] == b;
}
+constexpr bool match_v2su(__m64 _v, unsigned a, unsigned b) {
+ __v2su v = (__v2su)_v;
+ return v[0] == a && v[1] == b;
+}
+
constexpr bool match_v4hi(__m64 _v, short a, short b, short c, short d) {
__v4hi v = (__v4hi)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
@@ -48,11 +58,13 @@ constexpr bool match_m128d(__m128d _v, double a, double b) { return v[0] == __builtin_bit_cast(unsigned long long, a) && v[1] == __builtin_bit_cast(unsigned long long, b);
}
+#ifdef __SSE2__
constexpr bool match_m128h(__m128h _v, _Float16 __e00, _Float16 __e01, _Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, _Float16 __e07) {
__v8hu v = (__v8hu)_v;
return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == __builtin_bit_cast(unsigned short, __e01) && v[ 2] == __builtin_bit_cast(unsigned short, __e02) && v[ 3] == __builtin_bit_cast(unsigned short, __e03) &&
v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == __builtin_bit_cast(unsigned short, __e05) && v[ 6] == __builtin_bit_cast(unsigned short, __e06) && v[ 7] == __builtin_bit_cast(unsigned short, __e07);
}
+#endif
constexpr bool match_m128i(__m128i _v, unsigned long long a, unsigned long long b) {
__v2du v = (__v2du)_v;
@@ -63,11 +75,21 @@ constexpr bool match_v2di(__m128i v, long long a, long long b) { return v[0] == a && v[1] == b;
}
+constexpr bool match_v2du(__m128i _v, unsigned long long a, unsigned long long b) {
+ __v2du v = (__v2du)_v;
+ return v[0] == a && v[1] == b;
+}
+
constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
__v4si v = (__v4si)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}
+constexpr bool match_v4su(__m128i _v, unsigned a, unsigned b, unsigned c, unsigned d) {
+ __v4su v = (__v4su)_v;
+ return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, short e, short f, short g, short h) {
__v8hi v = (__v8hi)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
@@ -99,6 +121,7 @@ constexpr bool match_m256d(__m256d _v, double a, double b, double c, double d) { return v[0] == __builtin_bit_cast(unsigned long long, a) && v[1] == __builtin_bit_cast(unsigned long long, b) && v[2] == __builtin_bit_cast(unsigned long long, c) && v[3] == __builtin_bit_cast(unsigned long long, d);
}
+#ifdef __SSE2__
constexpr bool match_m256h(__m256h _v, _Float16 __e00, _Float16 __e01, _Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, _Float16 __e07,
_Float16 __e08, _Float16 __e09, _Float16 __e10, _Float16 __e11, _Float16 __e12, _Float16 __e13, _Float16 __e14, _Float16 __e15) {
__v16hu v = (__v16hu)_v;
@@ -107,6 +130,7 @@ constexpr bool match_m256h(__m256h _v, _Float16 __e00, _Float16 __e01, _Float16 v[ 8] == __builtin_bit_cast(unsigned short, __e08) && v[ 9] == __builtin_bit_cast(unsigned short, __e09) && v[10] == __builtin_bit_cast(unsigned short, __e10) && v[11] == __builtin_bit_cast(unsigned short, __e11) &&
v[12] == __builtin_bit_cast(unsigned short, __e12) && v[13] == __builtin_bit_cast(unsigned short, __e13) && v[14] == __builtin_bit_cast(unsigned short, __e14) && v[15] == __builtin_bit_cast(unsigned short, __e15);
}
+#endif
constexpr bool match_m256i(__m256i _v, unsigned long long a, unsigned long long b, unsigned long long c, unsigned long long d) {
__v4du v = (__v4du)_v;
@@ -123,6 +147,11 @@ constexpr bool match_v8si(__m256i _v, int a, int b, int c, int d, int e, int f, return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
}
+constexpr bool match_v8su(__m256i _v, unsigned a, unsigned b, unsigned c, unsigned d, unsigned e, unsigned f, unsigned g, unsigned h) {
+ __v8su v = (__v8su)_v;
+ return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
+}
+
constexpr bool match_v16hi(__m256i _v, short a, short b, short c, short d, short e, short f, short g, short h, short i, short j, short k, short l, short m, short n, short o, short p) {
__v16hi v = (__v16hi)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
@@ -169,6 +198,7 @@ constexpr bool match_m512d(__m512d _v, double __e00, double __e01, double __e02, v[ 4] == __builtin_bit_cast(unsigned long long, __e04) && v[ 5] == __builtin_bit_cast(unsigned long long, __e05) && v[ 6] == __builtin_bit_cast(unsigned long long, __e06) && v[ 7] == __builtin_bit_cast(unsigned long long, __e07);
}
+#ifdef __SSE2__
constexpr bool match_m512h(__m512h _v, _Float16 __e00, _Float16 __e01, _Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, _Float16 __e07,
_Float16 __e08, _Float16 __e09, _Float16 __e10, _Float16 __e11, _Float16 __e12, _Float16 __e13, _Float16 __e14, _Float16 __e15,
_Float16 __e16, _Float16 __e17, _Float16 __e18, _Float16 __e19, _Float16 __e20, _Float16 __e21, _Float16 __e22, _Float16 __e23,
@@ -183,6 +213,7 @@ constexpr bool match_m512h(__m512h _v, _Float16 __e00, _Float16 __e01, _Float16 v[24] == __builtin_bit_cast(unsigned short, __e24) && v[25] == __builtin_bit_cast(unsigned short, __e25) && v[26] == __builtin_bit_cast(unsigned short, __e26) && v[27] == __builtin_bit_cast(unsigned short, __e27) &&
v[28] == __builtin_bit_cast(unsigned short, __e28) && v[29] == __builtin_bit_cast(unsigned short, __e29) && v[30] == __builtin_bit_cast(unsigned short, __e30) && v[31] == __builtin_bit_cast(unsigned short, __e31);
}
+#endif
constexpr bool match_m512i(__m512i _v, unsigned long long a, unsigned long long b, unsigned long long c, unsigned long long d, unsigned long long e, unsigned long long f, unsigned long long g, unsigned long long h) {
__v8du v = (__v8du)_v;
@@ -199,6 +230,11 @@ constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int f, return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
}
+constexpr bool match_v16su(__m512i _v, unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, unsigned int m, unsigned int n, unsigned int o, unsigned int p) {
+ __v16su v = (__v16su)_v;
+ return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
constexpr bool match_v32hi(__m512i _v, short __e00, short __e01, short __e02, short __e03, short __e04, short __e05, short __e06, short __e07,
short __e08, short __e09, short __e10, short __e11, short __e12, short __e13, short __e14, short __e15,
short __e16, short __e17, short __e18, short __e19, short __e20, short __e21, short __e22, short __e23,
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c index 1bee8364f2ef..6a696273cb3c 100644 --- a/clang/test/CodeGen/X86/f16c-builtins.c +++ b/clang/test/CodeGen/X86/f16c-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/fma-builtins.c b/clang/test/CodeGen/X86/fma-builtins.c index 8e9822ec6ad2..5445e50d4ece 100644 --- a/clang/test/CodeGen/X86/fma-builtins.c +++ b/clang/test/CodeGen/X86/fma-builtins.c @@ -1,7 +1,12 @@ -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - -Wall -Werror | FileCheck %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/fma4-builtins.c b/clang/test/CodeGen/X86/fma4-builtins.c index dcfd48a220e1..fb449d5da259 100644 --- a/clang/test/CodeGen/X86/fma4-builtins.c +++ b/clang/test/CodeGen/X86/fma4-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <x86intrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/lzcnt-builtins.c b/clang/test/CodeGen/X86/lzcnt-builtins.c index 212155f123ad..eb02c11e14ee 100644 --- a/clang/test/CodeGen/X86/lzcnt-builtins.c +++ b/clang/test/CodeGen/X86/lzcnt-builtins.c @@ -1,59 +1,54 @@ // RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> +#include "builtin_test_helpers.h" unsigned short test__lzcnt16(unsigned short __X) { // CHECK: @llvm.ctlz.i16(i16 %{{.*}}, i1 false) return __lzcnt16(__X); } +TEST_CONSTEXPR(__lzcnt16(0x0000) == 16); +TEST_CONSTEXPR(__lzcnt16(0x8000) == 0); +TEST_CONSTEXPR(__lzcnt16(0x0010) == 11); unsigned int test_lzcnt32(unsigned int __X) { // CHECK: @llvm.ctlz.i32(i32 %{{.*}}, i1 false) return __lzcnt32(__X); } +TEST_CONSTEXPR(__lzcnt32(0x00000000) == 32); +TEST_CONSTEXPR(__lzcnt32(0x80000000) == 0); +TEST_CONSTEXPR(__lzcnt32(0x00000010) == 27); unsigned long long test__lzcnt64(unsigned long long __X) { // CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false) return __lzcnt64(__X); } +TEST_CONSTEXPR(__lzcnt64(0x0000000000000000ULL) == 64); +TEST_CONSTEXPR(__lzcnt64(0x8000000000000000ULL) == 0); +TEST_CONSTEXPR(__lzcnt64(0x0000000100000000ULL) == 31); unsigned int test_lzcnt_u32(unsigned int __X) { // CHECK: @llvm.ctlz.i32(i32 %{{.*}}, i1 false) return _lzcnt_u32(__X); } +TEST_CONSTEXPR(_lzcnt_u32(0x00000000) == 32); +TEST_CONSTEXPR(_lzcnt_u32(0x80000000) == 0); +TEST_CONSTEXPR(_lzcnt_u32(0x00000010) == 27); unsigned long long test__lzcnt_u64(unsigned long long __X) { // CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false) return _lzcnt_u64(__X); } - - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) -char lzcnt16_0[__lzcnt16(0x0000) == 16 ? 1 : -1]; -char lzcnt16_1[__lzcnt16(0x8000) == 0 ? 1 : -1]; -char lzcnt16_2[__lzcnt16(0x0010) == 11 ? 1 : -1]; - -char lzcnt32_0[__lzcnt32(0x00000000) == 32 ? 1 : -1]; -char lzcnt32_1[__lzcnt32(0x80000000) == 0 ? 1 : -1]; -char lzcnt32_2[__lzcnt32(0x00000010) == 27 ? 1 : -1]; - -char lzcnt64_0[__lzcnt64(0x0000000000000000ULL) == 64 ? 1 : -1]; -char lzcnt64_1[__lzcnt64(0x8000000000000000ULL) == 0 ? 1 : -1]; -char lzcnt64_2[__lzcnt64(0x0000000100000000ULL) == 31 ? 1 : -1]; - -char lzcntu32_0[_lzcnt_u32(0x00000000) == 32 ? 1 : -1]; -char lzcntu32_1[_lzcnt_u32(0x80000000) == 0 ? 1 : -1]; -char lzcntu32_2[_lzcnt_u32(0x00000010) == 27 ? 1 : -1]; - -char lzcntu64_0[_lzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1]; -char lzcntu64_1[_lzcnt_u64(0x8000000000000000ULL) == 0 ? 1 : -1]; -char lzcntu64_2[_lzcnt_u64(0x0000000100000000ULL) == 31 ? 1 : -1]; -#endif +TEST_CONSTEXPR(_lzcnt_u64(0x0000000000000000ULL) == 64); +TEST_CONSTEXPR(_lzcnt_u64(0x8000000000000000ULL) == 0); +TEST_CONSTEXPR(_lzcnt_u64(0x0000000100000000ULL) == 31); diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 35f0d6c9b43e..43d9ec5e6cc8 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -7,6 +7,15 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -16,18 +25,21 @@ __m64 test_mm_abs_pi8(__m64 a) { // CHECK: call <8 x i8> @llvm.abs.v8i8( return _mm_abs_pi8(a); } +TEST_CONSTEXPR(match_v8qi(_mm_abs_pi8((__m64)(__v8qs){-3, +2, -1, 0, +1, -2, +3, -4}), 3, 2, 1, 0, 1, 2, 3, 4)); __m64 test_mm_abs_pi16(__m64 a) { // CHECK-LABEL: test_mm_abs_pi16 // CHECK: call <4 x i16> @llvm.abs.v4i16( return _mm_abs_pi16(a); } +TEST_CONSTEXPR(match_v4hi(_mm_abs_pi16((__m64)(__v4hi){+1, -2, +3, -4}), 1, 2, 3, 4)); __m64 test_mm_abs_pi32(__m64 a) { // CHECK-LABEL: test_mm_abs_pi32 // CHECK: call <2 x i32> @llvm.abs.v2i32( return _mm_abs_pi32(a); } +TEST_CONSTEXPR(match_v2si(_mm_abs_pi32((__m64)(__v2si){+5, -3}), 5, 3)); __m64 test_mm_add_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_add_pi8 @@ -111,12 +123,14 @@ __m64 test_mm_avg_pu8(__m64 a, __m64 b) { // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b( return _mm_avg_pu8(a, b); } +TEST_CONSTEXPR(match_v8qu(_mm_avg_pu8((__m64)(__v8qu){0, 1, 2, 3, 18, 15, 12, 20}, (__m64)(__v8qu){0, 1, 2, 3, 16, 3, 20, 10}), 0, 1, 2, 3, 17, 9, 16, 15)); __m64 test_mm_avg_pu16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_avg_pu16 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w( return _mm_avg_pu16(a, b); } +TEST_CONSTEXPR(match_v4hu(_mm_avg_pu16((__m64)(__v4hu){18, 15, 12, 20}, (__m64)(__v4hu){16, 3, 20, 10}), 17, 9, 16, 15)); __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpeq_pi8 @@ -358,6 +372,8 @@ __m64 test_mm_max_pi16(__m64 a, __m64 b) { return _mm_max_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_max_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, 2, -3, 4}), 1, 2, 3, 4)); + __m64 test_mm_max_pu8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_max_pu8 // CHECK: call <8 x i8> @llvm.umax.v8i8( @@ -370,6 +386,8 @@ __m64 test_mm_min_pi16(__m64 a, __m64 b) { return _mm_min_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_min_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, 2, -3, 4}), -1, -2, -3, -4)); + __m64 test_mm_min_pu8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_min_pu8 // CHECK: call <8 x i8> @llvm.umin.v8i8( @@ -605,18 +623,35 @@ __m64 test_mm_slli_pi16(__m64 a) { // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w( return _mm_slli_pi16(a, 3); } +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 0), 0, 1, 2, 3)); +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 0x2, 0x4, 0x6)); +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 15), 0, 0x8000, 0x0, 0x8000)); +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 16), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_slli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 17), 0, 0, 0, 0)); __m64 test_mm_slli_pi32(__m64 a) { // CHECK-LABEL: test_mm_slli_pi32 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d( return _mm_slli_pi32(a, 3); } +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){0, 1}, 0), 0, 1)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){0, 1}, 1), 0, 0x2)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 2}, 2), 0x4, 0x8)); +TEST_CONSTEXPR(match_v2su(_mm_slli_pi32((__m64)(__v2su){1, 1}, 31), 0x80000000, 0x80000000)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 1}, 32), 0, 0)); +TEST_CONSTEXPR(match_v2si(_mm_slli_pi32((__m64)(__v2si){1, 1}, 33), 0, 0)); __m64 test_mm_slli_si64(__m64 a) { // CHECK-LABEL: test_mm_slli_si64 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q( return _mm_slli_si64(a, 3); } +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){0}, 0), 0)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 1), 0x2)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){2}, 2), 0x8)); +TEST_CONSTEXPR(match_v1du(_mm_slli_si64((__m64)(__v1du){1}, 63), 0x8000000000000000ULL)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 64), 0)); +TEST_CONSTEXPR(match_v1di(_mm_slli_si64((__m64)(__v1di){1}, 65), 0)); __m64 test_mm_sra_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sra_pi16 @@ -635,12 +670,25 @@ __m64 test_mm_srai_pi16(__m64 a) { // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w( return _mm_srai_pi16(a, 3); } +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 1), -1, 0, 1, 1)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, -1, 1, 2}, 15), -1, -1, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, -1, 2, 3}, 16), -1, -1, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 16), -1, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 17), -1, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, -42, -32768}, 100), -1, 0, -1, -1)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-1, 1, 2, 3}, 200), -1, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 1), -16384, 16383, -1, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 15), -1, 0, -1, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srai_pi16((__m64)(__v4hi){-32768, 32767, -2, 0}, 30), -1, 0, -1, 0)); __m64 test_mm_srai_pi32(__m64 a) { // CHECK-LABEL: test_mm_srai_pi32 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d( return _mm_srai_pi32(a, 3); } +TEST_CONSTEXPR(match_v2si(_mm_srai_pi32((__m64)(__v2si){-32768, 32767}, 30), -1, 0)); +TEST_CONSTEXPR(match_v2si(_mm_srai_pi32((__m64)(__v2si){-2, 20}, 1), -1, 0xa)); +TEST_CONSTEXPR(match_v2si(_mm_srai_pi32((__m64)(__v2si){-1, 20}, 1), -1, 0xa)); __m64 test_mm_srl_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_srl_pi16 @@ -665,18 +713,25 @@ __m64 test_mm_srli_pi16(__m64 a) { // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w( return _mm_srli_pi16(a, 3); } +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 0x0, 0x1, 0x1)); +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 1), 0, 0x0, 0x1, 0x1)); +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){-1, 0, 0, 0}, 1), 0x7fff, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 16), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_srli_pi16((__m64)(__v4hi){0, 1, 2, 3}, 17), 0, 0, 0, 0)); __m64 test_mm_srli_pi32(__m64 a) { // CHECK-LABEL: test_mm_srli_pi32 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d( return _mm_srli_pi32(a, 3); } +TEST_CONSTEXPR(match_v2si(_mm_srli_pi32((__m64)(__v2si){1, 1025}, 2), 0x0, 0x100)); __m64 test_mm_srli_si64(__m64 a) { // CHECK-LABEL: test_mm_srli_si64 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q( return _mm_srli_si64(a, 3); } +TEST_CONSTEXPR(match_v1di(_mm_srli_si64((__m64)(__v1di){1025}, 2), 0x100)); void test_mm_stream_pi(__m64 *p, __m64 a) { // CHECK-LABEL: test_mm_stream_pi diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c index a4d887f0be41..8d54eb1da08d 100644 --- a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c +++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -verify +// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2 -verify #include <immintrin.h> __m512i test_mm512_loadrs_epi8(const __m512i * __A) { diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c index 997d6dbc53a8..4b75c6049f2f 100644 --- a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c +++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c index 2a7204e39b83..c11e28b2d385 100644 --- a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c +++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -verify +// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2 -verify #include <immintrin.h> __m128i test_mm_loadrs_epi8(const __m128i * __A) { diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c index 2011b2a86247..05524ab1f942 100644 --- a/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c +++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/popcnt-builtins.c b/clang/test/CodeGen/X86/popcnt-builtins.c index b27bc3f0597f..fdd1a4c0e5d9 100644 --- a/clang/test/CodeGen/X86/popcnt-builtins.c +++ b/clang/test/CodeGen/X86/popcnt-builtins.c @@ -3,24 +3,37 @@ // RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -no-enable-noundef-analysis -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -no-enable-noundef-analysis -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +popcnt -no-enable-noundef-analysis -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,CHECK-POPCNT +// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +popcnt -no-enable-noundef-analysis -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,CHECK-POPCNT +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -no-enable-noundef-analysis -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -no-enable-noundef-analysis -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s + + #include <x86intrin.h> +#include "builtin_test_helpers.h" #ifdef __POPCNT__ int test_mm_popcnt_u32(unsigned int __X) { //CHECK-POPCNT: call i32 @llvm.ctpop.i32 return _mm_popcnt_u32(__X); } +TEST_CONSTEXPR(_mm_popcnt_u32(0x00000000) == 0); +TEST_CONSTEXPR(_mm_popcnt_u32(0x000000F0) == 4); #endif int test_popcnt32(unsigned int __X) { //CHECK: call i32 @llvm.ctpop.i32 return _popcnt32(__X); } +TEST_CONSTEXPR(_popcnt32(0x00000000) == 0); +TEST_CONSTEXPR(_popcnt32(0x100000F0) == 5); int test__popcntd(unsigned int __X) { //CHECK: call i32 @llvm.ctpop.i32 return __popcntd(__X); } +TEST_CONSTEXPR(__popcntd(0x00000000) == 0); +TEST_CONSTEXPR(__popcntd(0x00F000F0) == 8); #ifdef __x86_64__ #ifdef __POPCNT__ @@ -28,42 +41,21 @@ long long test_mm_popcnt_u64(unsigned long long __X) { //CHECK-POPCNT: call i64 @llvm.ctpop.i64 return _mm_popcnt_u64(__X); } +TEST_CONSTEXPR(_mm_popcnt_u64(0x0000000000000000ULL) == 0); +TEST_CONSTEXPR(_mm_popcnt_u64(0xF000000000000001ULL) == 5); #endif long long test_popcnt64(unsigned long long __X) { //CHECK: call i64 @llvm.ctpop.i64 return _popcnt64(__X); } +TEST_CONSTEXPR(_popcnt64(0x0000000000000000ULL) == 0); +TEST_CONSTEXPR(_popcnt64(0xF00000F000000001ULL) == 9); long long test__popcntq(unsigned long long __X) { //CHECK: call i64 @llvm.ctpop.i64 return __popcntq(__X); } -#endif - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#if defined(__POPCNT__) -char ctpop32_0[_mm_popcnt_u32(0x00000000) == 0 ? 1 : -1]; -char ctpop32_1[_mm_popcnt_u32(0x000000F0) == 4 ? 1 : -1]; -#endif - -char popcnt32_0[_popcnt32(0x00000000) == 0 ? 1 : -1]; -char popcnt32_1[_popcnt32(0x100000F0) == 5 ? 1 : -1]; - -char popcntd_0[__popcntd(0x00000000) == 0 ? 1 : -1]; -char popcntd_1[__popcntd(0x00F000F0) == 8 ? 1 : -1]; - -#ifdef __x86_64__ -#if defined(__POPCNT__) -char ctpop64_0[_mm_popcnt_u64(0x0000000000000000ULL) == 0 ? 1 : -1]; -char ctpop64_1[_mm_popcnt_u64(0xF000000000000001ULL) == 5 ? 1 : -1]; -#endif - -char popcnt64_0[_popcnt64(0x0000000000000000ULL) == 0 ? 1 : -1]; -char popcnt64_1[_popcnt64(0xF00000F000000001ULL) == 9 ? 1 : -1]; - -char popcntq_0[__popcntq(0x0000000000000000ULL) == 0 ? 1 : -1]; -char popcntq_1[__popcntq(0xF000010000300001ULL) == 8 ? 1 : -1]; -#endif +TEST_CONSTEXPR(__popcntq(0x0000000000000000ULL) == 0); +TEST_CONSTEXPR(__popcntq(0xF000010000300001ULL) == 8); #endif diff --git a/clang/test/CodeGen/X86/rot-intrinsics.c b/clang/test/CodeGen/X86/rot-intrinsics.c index 5da300b024b5..338b06fbf2d6 100644 --- a/clang/test/CodeGen/X86/rot-intrinsics.c +++ b/clang/test/CodeGen/X86/rot-intrinsics.c @@ -5,14 +5,22 @@ // RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG // RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding -triple i686--linux -no-enable-noundef-analysis -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding -triple x86_64--linux -no-enable-noundef-analysis -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG -// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -ffreestanding -triple i686--linux -no-enable-noundef-analysis -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64--linux -no-enable-noundef-analysis -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG + +// RUN: %clang_cc1 -x c++ -ffreestanding -triple i686--linux -no-enable-noundef-analysis -emit-llvm %s -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64--linux -no-enable-noundef-analysis -emit-llvm %s -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -no-enable-noundef-analysis -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG #include <x86intrin.h> +#include "builtin_test_helpers.h" unsigned char test__rolb(unsigned char value, int shift) { // CHECK-LABEL: test__rolb @@ -20,6 +28,7 @@ unsigned char test__rolb(unsigned char value, int shift) { // CHECK: ret i8 [[R]] return __rolb(value, shift); } +TEST_CONSTEXPR(__rolb(0x01, 5) == 0x20); unsigned short test__rolw(unsigned short value, int shift) { // CHECK-LABEL: test__rolw @@ -27,6 +36,7 @@ unsigned short test__rolw(unsigned short value, int shift) { // CHECK: ret i16 [[R]] return __rolw(value, shift); } +TEST_CONSTEXPR(__rolw(0x3210, 11) == 0x8190); unsigned int test__rold(unsigned int value, int shift) { // CHECK-LABEL: test__rold @@ -34,6 +44,7 @@ unsigned int test__rold(unsigned int value, int shift) { // CHECK: ret i32 [[R]] return __rold(value, shift); } +TEST_CONSTEXPR(__rold(0x76543210, 22) == 0x841D950C); #if defined(__x86_64__) unsigned long test__rolq(unsigned long value, int shift) { @@ -42,6 +53,7 @@ unsigned long test__rolq(unsigned long value, int shift) { // CHECK-LONG: ret i64 [[R]] return __rolq(value, shift); } +TEST_CONSTEXPR(__rolq(0xFEDCBA9876543210ULL, 55) == 0x087F6E5D4C3B2A19ULL); #endif unsigned char test__rorb(unsigned char value, int shift) { @@ -50,6 +62,7 @@ unsigned char test__rorb(unsigned char value, int shift) { // CHECK: ret i8 [[R]] return __rorb(value, shift); } +TEST_CONSTEXPR(__rorb(0x01, 5) == 0x08); unsigned short test__rorw(unsigned short value, int shift) { // CHECK-LABEL: test__rorw @@ -57,6 +70,7 @@ unsigned short test__rorw(unsigned short value, int shift) { // CHECK: ret i16 [[R]] return __rorw(value, shift); } +TEST_CONSTEXPR(__rorw(0x3210, 11) == 0x4206); unsigned int test__rord(unsigned int value, int shift) { // CHECK-LABEL: test__rord @@ -64,6 +78,7 @@ unsigned int test__rord(unsigned int value, int shift) { // CHECK: ret i32 [[R]] return __rord(value, shift); } +TEST_CONSTEXPR(__rord(0x76543210, 22) == 0x50C841D9); #if defined(__x86_64__) unsigned long test__rorq(unsigned long value, int shift) { @@ -72,6 +87,7 @@ unsigned long test__rorq(unsigned long value, int shift) { // CHECK-LONG: ret i64 [[R]] return __rorq(value, shift); } +TEST_CONSTEXPR(__rorq(0xFEDCBA9876543210ULL, 55) == 0xB97530ECA86421FDULL); #endif unsigned short test_rotwl(unsigned short value, int shift) { @@ -80,6 +96,7 @@ unsigned short test_rotwl(unsigned short value, int shift) { // CHECK: ret i16 [[R]] return _rotwl(value, shift); } +TEST_CONSTEXPR(_rotwl(0x3210, 4) == 0x2103); unsigned int test_rotl(unsigned int value, int shift) { // CHECK-LABEL: test_rotl @@ -87,6 +104,7 @@ unsigned int test_rotl(unsigned int value, int shift) { // CHECK: ret i32 [[R]] return _rotl(value, shift); } +TEST_CONSTEXPR(_rotl(0x76543210, 8) == 0x54321076); unsigned long test_lrotl(unsigned long value, int shift) { // CHECK-32BIT-LONG-LABEL: test_lrotl @@ -98,6 +116,11 @@ unsigned long test_lrotl(unsigned long value, int shift) { // CHECK-64BIT-LONG: ret i64 [[R]] return _lrotl(value, shift); } +#if defined(__LP64__) && !defined(_MSC_VER) +TEST_CONSTEXPR(_lrotl(0xFEDCBA9876543210ULL, 55) == 0x087F6E5D4C3B2A19ULL); +#else +TEST_CONSTEXPR(_lrotl(0x76543210, 22) == 0x841D950C); +#endif unsigned short test_rotwr(unsigned short value, int shift) { @@ -106,6 +129,7 @@ unsigned short test_rotwr(unsigned short value, int shift) { // CHECK: ret i16 [[R]] return _rotwr(value, shift); } +TEST_CONSTEXPR(_rotwr(0x3210, 4) == 0x0321); unsigned int test_rotr(unsigned int value, int shift) { // CHECK-LABEL: test_rotr @@ -113,6 +137,7 @@ unsigned int test_rotr(unsigned int value, int shift) { // CHECK: ret i32 [[R]] return _rotr(value, shift); } +TEST_CONSTEXPR(_rotr(0x76543210, 8) == 0x10765432); unsigned long test_lrotr(unsigned long value, int shift) { // CHECK-32BIT-LONG-LABEL: test_lrotr @@ -124,34 +149,9 @@ unsigned long test_lrotr(unsigned long value, int shift) { // CHECK-64BIT-LONG: ret i64 [[R]] return _lrotr(value, shift); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) - -char rolb_0[__rolb(0x01, 5) == 0x20 ? 1 : -1]; -char rolw_0[__rolw(0x3210, 11) == 0x8190 ? 1 : -1]; -char rold_0[__rold(0x76543210, 22) == 0x841D950C ? 1 : -1]; - -char rorb_0[__rorb(0x01, 5) == 0x08 ? 1 : -1]; -char rorw_0[__rorw(0x3210, 11) == 0x4206 ? 1 : -1]; -char rord_0[__rord(0x76543210, 22) == 0x50C841D9 ? 1 : -1]; - -#if defined(__x86_64__) -char rolq_0[__rolq(0xFEDCBA9876543210ULL, 55) == 0x087F6E5D4C3B2A19ULL ? 1 : -1]; -char rorq_0[__rorq(0xFEDCBA9876543210ULL, 55) == 0xB97530ECA86421FDULL ? 1 : -1]; -#endif - -char rotwl_0[_rotwl(0x3210, 4) == 0x2103 ? 1 : -1]; -char rotwr_0[_rotwr(0x3210, 4) == 0x0321 ? 1 : -1]; -char rotl_0[_rotl(0x76543210, 8) == 0x54321076 ? 1 : -1]; -char rotr_0[_rotr(0x76543210, 8) == 0x10765432 ? 1 : -1]; - #if defined(__LP64__) && !defined(_MSC_VER) -char lrotl_0[_lrotl(0xFEDCBA9876543210ULL, 55) == 0x087F6E5D4C3B2A19ULL ? 1 : -1]; -char lrotr_0[_lrotr(0xFEDCBA9876543210ULL, 55) == 0xB97530ECA86421FDULL ? 1 : -1]; +TEST_CONSTEXPR(_lrotr(0xFEDCBA9876543210ULL, 55) == 0xB97530ECA86421FDULL); #else -char lrotl_0[_lrotl(0x76543210, 22) == 0x841D950C ? 1 : -1]; -char lrotr_0[_lrotr(0x76543210, 22) == 0x50C841D9 ? 1 : -1]; +TEST_CONSTEXPR(_lrotr(0x76543210, 22) == 0x50C841D9); #endif -#endif diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c index 0e54bd008d4f..a819f252e513 100644 --- a/clang/test/CodeGen/X86/sm4-evex-builtins.c +++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \ -// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \ -// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: -target-feature +avx10.2 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> #include <stddef.h> diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 12d9abdfb582..3bad3426b158 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 38d5e877a503..0ba32bb230cd 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -9,6 +9,17 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -123,12 +134,14 @@ __m128i test_mm_avg_epu8(__m128i A, __m128i B) { // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_avg_epu8(A, B); } +TEST_CONSTEXPR(match_v16qu(_mm_avg_epu8((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); __m128i test_mm_avg_epu16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu16 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_avg_epu16(A, B); } +TEST_CONSTEXPR(match_v8hu(_mm_avg_epu16((__m128i)(__v8hu){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hu){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 5, 6, 7, 8)); __m128i test_mm_bslli_si128(__m128i A) { // CHECK-LABEL: test_mm_bslli_si128 @@ -243,18 +256,21 @@ __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) { // CHECK: icmp eq <16 x i8> return _mm_cmpeq_epi8(A, B); } +TEST_CONSTEXPR(match_v16qi(_mm_cmpeq_epi8((__m128i)(__v16qs){1,-2,3,-4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16}, (__m128i)(__v16qs){10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16}), 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1)); __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmpeq_epi16 // CHECK: icmp eq <8 x i16> return _mm_cmpeq_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_cmpeq_epi16((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-10, -2, +6, -4, +5, -12, +14, -8}), 0, -1, 0, -1, -1, 0, 0, -1)); __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmpeq_epi32 // CHECK: icmp eq <4 x i32> return _mm_cmpeq_epi32(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_cmpeq_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, -2, +6, -4}), 0, -1, 0, -1)); __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpeq_pd @@ -293,18 +309,24 @@ __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) { // CHECK: icmp sgt <16 x i8> return _mm_cmpgt_epi8(A, B); } +TEST_CONSTEXPR(match_v16qi(_mm_cmpgt_epi8( + (__m128i)(__v16qs){15,-2,8,-4,12,6,-20,8,25,-10,30,12,-35,14,40,-16}, + (__m128i)(__v16qs){10,-2,6,-4,5,12,-14,8,9,-20,22,12,-26,14,30,-16}), + -1, 0, -1, 0, -1, 0, 0, 0,-1, -1, -1, 0, 0, 0, -1, 0)); __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmpgt_epi16 // CHECK: icmp sgt <8 x i16> return _mm_cmpgt_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_cmpgt_epi16((__m128i)(__v8hi){15,2,8,4,12,6,20,8}, (__m128i)(__v8hi){10,2,6,4,5,12,14,8}), -1,0,-1,0,-1,0,-1,0)); __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmpgt_epi32 // CHECK: icmp sgt <4 x i32> return _mm_cmpgt_epi32(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_cmpgt_epi32((__m128i)(__v4si){15,2,8,4}, (__m128i)(__v4si){10,2,6,4}), -1,0,-1,0)); __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpgt_pd @@ -343,18 +365,24 @@ __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) { // CHECK: icmp sgt <16 x i8> return _mm_cmplt_epi8(A, B); } +TEST_CONSTEXPR(match_v16qi(_mm_cmplt_epi8( + (__m128i)(__v16qs){15,-2,8,-4,12,6,-20,8,25,-10,30,12,-35,14,40,-16}, + (__m128i)(__v16qs){10,-2,6,-4,5,12,-14,8,9,-20,22,12,-26,14,30,-16}), + 0, 0, 0, 0, 0, -1, -1, 0,0, 0, 0, 0, -1, 0, 0, 0)); __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmplt_epi16 // CHECK: icmp sgt <8 x i16> return _mm_cmplt_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_cmplt_epi16((__m128i)(__v8hi){5,2,3,4,1,6,7,8}, (__m128i)(__v8hi){10,2,6,4,5,12,14,8}), -1, 0, -1, 0, -1, -1, -1, 0)); __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmplt_epi32 // CHECK: icmp sgt <4 x i32> return _mm_cmplt_epi32(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_cmplt_epi32((__m128i)(__v4si){5,2,3,4}, (__m128i)(__v4si){10,2,6,4}), -1,0,-1,0)); __m128d test_mm_cmplt_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmplt_pd @@ -829,12 +857,16 @@ __m128i test_mm_max_epi16(__m128i A, __m128i B) { return _mm_max_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_max_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 5, 8, 12, 20, 32)); + __m128i test_mm_max_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_max_epu8 // CHECK: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_max_epu8(A, B); } +TEST_CONSTEXPR(match_v16qu(_mm_max_epu8((__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 9, 10, 11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16)); + __m128d test_mm_max_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_max_pd // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) @@ -859,12 +891,16 @@ __m128i test_mm_min_epi16(__m128i A, __m128i B) { return _mm_min_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_min_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 4, 5, 6, 7, 8)); + __m128i test_mm_min_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_min_epu8 // CHECK: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_min_epu8(A, B); } +TEST_CONSTEXPR(match_v16qu(_mm_min_epu8((__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); + __m128d test_mm_min_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_min_pd // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) @@ -1296,6 +1332,10 @@ __m128i test_mm_slli_epi16(__m128i A) { // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) return _mm_slli_epi16(A, 1); } +TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 0), 0, 1, 2, 3, 4, 5, 6, 7)); +TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe)); +TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 8, 2, 3, 4, 5, 6, 7}, 8), 0, 0x800, 0x200, 0x300, 0x400, 0x500, 0x600, 0x700)); +TEST_CONSTEXPR(match_v8hi(_mm_slli_epi16((__m128i)(__v8hi){0, 8, 2, 3, 4, 5, 6, 7}, 16), 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_slli_epi16_1(__m128i A) { // CHECK-LABEL: test_mm_slli_epi16_1 @@ -1314,6 +1354,11 @@ __m128i test_mm_slli_epi32(__m128i A) { // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) return _mm_slli_epi32(A, 1); } +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 0), 0, 1, 2, 3)); +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 1), 0, 0x2, 0x4, 0x6)); +TEST_CONSTEXPR(match_v4su(_mm_slli_epi32((__m128i)(__v4su){0, 1, 2, 3}, 31), 0, 0x80000000, 0x0, 0x80000000)); +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 32), 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v4si(_mm_slli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 33), 0, 0, 0, 0)); __m128i test_mm_slli_epi32_1(__m128i A) { // CHECK-LABEL: test_mm_slli_epi32_1 @@ -1332,6 +1377,12 @@ __m128i test_mm_slli_epi64(__m128i A) { // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) return _mm_slli_epi64(A, 1); } +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 0), 0, 1)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 1), 0, 0x2)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){5, 8}, 6), 0x140, 0x200)); +TEST_CONSTEXPR(match_v2du(_mm_slli_epi64((__m128i)(__v2du){0, 1}, 63), 0, 0x8000000000000000ULL)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 64), 0, 0)); +TEST_CONSTEXPR(match_v2di(_mm_slli_epi64((__m128i)(__v2di){0, 1}, 65), 0, 0)); __m128i test_mm_slli_epi64_1(__m128i A) { // CHECK-LABEL: test_mm_slli_epi64_1 @@ -1388,6 +1439,8 @@ __m128i test_mm_srai_epi16(__m128i A) { // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}}) return _mm_srai_epi16(A, 1); } +TEST_CONSTEXPR(match_v8hi(_mm_srai_epi16((__m128i)(__v8hi){-32768, 32767, -3, -2, -1, 0, 1, 2}, 1), -16384, 16383, -2, -1, -1, 0, 0, 1)); +TEST_CONSTEXPR(match_v8hi(_mm_srai_epi16((__m128i)(__v8hi){-32768, 32767, -3, -2, -1, 0, 1, 2}, 1), -16384, 16383, -2, -1, -1, 0, 0, 1)); __m128i test_mm_srai_epi16_1(__m128i A) { // CHECK-LABEL: test_mm_srai_epi16_1 @@ -1406,6 +1459,7 @@ __m128i test_mm_srai_epi32(__m128i A) { // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}}) return _mm_srai_epi32(A, 1); } +TEST_CONSTEXPR(match_v4si(_mm_srai_epi32((__m128i)(__v4si){-32768, 32767, -3, 2}, 1), -16384, 16383, -2, 1)); __m128i test_mm_srai_epi32_1(__m128i A) { // CHECK-LABEL: test_mm_srai_epi32_1 @@ -1442,6 +1496,7 @@ __m128i test_mm_srli_epi16(__m128i A) { // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) return _mm_srli_epi16(A, 1); } +TEST_CONSTEXPR(match_v8hi(_mm_srli_epi16((__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 1), 0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3)); __m128i test_mm_srli_epi16_1(__m128i A) { // CHECK-LABEL: test_mm_srli_epi16_1 @@ -1460,6 +1515,7 @@ __m128i test_mm_srli_epi32(__m128i A) { // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) return _mm_srli_epi32(A, 1); } +TEST_CONSTEXPR(match_v4si(_mm_srli_epi32((__m128i)(__v4si){0, 1, 2, 3}, 8), 0, 0x0, 0x0, 0x0)); __m128i test_mm_srli_epi32_1(__m128i A) { // CHECK-LABEL: test_mm_srli_epi32_1 @@ -1478,6 +1534,7 @@ __m128i test_mm_srli_epi64(__m128i A) { // CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) return _mm_srli_epi64(A, 1); } +TEST_CONSTEXPR(match_v2di(_mm_srli_epi64((__m128i)(__v2di){100005, 100008}, 6), 0x61a, 0x61a)); __m128i test_mm_srli_epi64_1(__m128i A) { // CHECK-LABEL: test_mm_srli_epi64_1 diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index d47c19b882cd..c53afc56e724 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse3 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse3 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 500b780d4905..dca161c8038a 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -7,6 +7,15 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -36,18 +45,21 @@ __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) { // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_blendv_epi8(V1, V2, V3); } +TEST_CONSTEXPR(match_v16qi(_mm_blendv_epi8((__m128i)(__v16qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15},(__m128i)(__v16qs){-99,-98,97,-96,-95,-94,-93,-92,-91,-90,-89,-88,-87,-86,-85,-84},(__m128i)(__v16qs){-1,-1,0,-1,0,0,0,0,0,-1,-1,-1,0,0,-1,0}), -99, -98, 2, -96, 4, 5, 6, 7, 8, -90, -89, -88, 12, 13, -85, 15)); __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) { // CHECK-LABEL: test_mm_blendv_pd // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_blendv_pd(V1, V2, V3); } +TEST_CONSTEXPR(match_m128d(_mm_blendv_pd((__m128d)(__v2df){2.0, -4.0},(__m128d)(__v2df){-111.0, +222.0},(__m128d)(__v2df){2.0, -2.0}), 2.0, 222.0)); __m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) { // CHECK-LABEL: test_mm_blendv_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.blendvps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_blendv_ps(V1, V2, V3); } +TEST_CONSTEXPR(match_m128(_mm_blendv_ps((__m128)(__v4sf){0.0f, 1.0f, 2.0f, 3.0f},(__m128)(__v4sf){-100.0f, -101.0f, -102.0f, -103.0f},(__m128)(__v4sf){-1.0f, 2.0f, -3.0f, 0.0f}), -100.0f, 1.0f, -102.0f, 3.0f)); __m128d test_mm_ceil_pd(__m128d x) { // CHECK-LABEL: test_mm_ceil_pd @@ -79,6 +91,7 @@ __m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) { // CHECK: sext <2 x i1> %{{.*}} to <2 x i64> return _mm_cmpeq_epi64(A, B); } +TEST_CONSTEXPR(match_v2di(_mm_cmpeq_epi64((__m128i)(__v2di){+1, -8}, (__m128i)(__v2di){-10, -8}), 0, -1)); __m128i test_mm_cvtepi8_epi16(__m128i a) { // CHECK-LABEL: test_mm_cvtepi8_epi16 @@ -281,48 +294,64 @@ __m128i test_mm_max_epi8(__m128i x, __m128i y) { return _mm_max_epi8(x, y); } +TEST_CONSTEXPR(match_v16qi(_mm_max_epi8((__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16)); + __m128i test_mm_max_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi32 // CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_max_epi32(x, y); } +TEST_CONSTEXPR(match_v4si(_mm_max_epi32((__m128i)(__v4si){-1, +2, -3, +4}, (__m128i)(__v4si){+1, -2, +3, -4}), +1, +2, +3, +4 )); + __m128i test_mm_max_epu16(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epu16 // CHECK: call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_max_epu16(x, y); } +TEST_CONSTEXPR(match_v8hu(_mm_max_epu16((__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 3, 4, 5, 7, 9, 11, 13, 15)); + __m128i test_mm_max_epu32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epu32 // CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_max_epu32(x, y); } +TEST_CONSTEXPR(match_v4su(_mm_max_epu32((__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 3, 4, 5, 7)); + __m128i test_mm_min_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epi8 // CHECK: call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_min_epi8(x, y); } +TEST_CONSTEXPR(match_v16qi(_mm_min_epi8((__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16)); + __m128i test_mm_min_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epi32 // CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_min_epi32(x, y); } +TEST_CONSTEXPR(match_v4si(_mm_min_epi32((__m128i)(__v4si){-1, +2, -3, +4}, (__m128i)(__v4si){+1, -2, +3, -4}), -1, -2, -3, -4 )); + __m128i test_mm_min_epu16(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epu16 // CHECK: call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_min_epu16(x, y); } +TEST_CONSTEXPR(match_v8hu(_mm_min_epu16((__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 1, 3, 5, 6, 7, 8, 9, 10)); + __m128i test_mm_min_epu32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_min_epu32 // CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_min_epu32(x, y); } +TEST_CONSTEXPR(match_v4su(_mm_min_epu32((__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 1, 3, 5, 6)); + __m128i test_mm_minpos_epu16(__m128i x) { // CHECK-LABEL: test_mm_minpos_epu16 // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}}) @@ -351,6 +380,7 @@ __m128i test_mm_mullo_epi32(__m128i x, __m128i y) { // CHECK: mul <4 x i32> return _mm_mullo_epi32(x, y); } +TEST_CONSTEXPR(match_v4si(_mm_mullo_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-16, +14, +12, -10}), -16, -28, +36, +40)); __m128i test_mm_packus_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_packus_epi32 diff --git a/clang/test/CodeGen/X86/sse42-builtins.c b/clang/test/CodeGen/X86/sse42-builtins.c index d0c0cce33e1d..3a1e8fc79303 100644 --- a/clang/test/CodeGen/X86/sse42-builtins.c +++ b/clang/test/CodeGen/X86/sse42-builtins.c @@ -7,8 +7,18 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.2 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK + #include <immintrin.h> +#include "builtin_test_helpers.h" // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -59,6 +69,7 @@ __m128i test_mm_cmpgt_epi64(__m128i A, __m128i B) { // CHECK: icmp sgt <2 x i64> return _mm_cmpgt_epi64(A, B); } +TEST_CONSTEXPR(match_v2di(_mm_cmpgt_epi64((__m128i)(__v2di){+1, -8}, (__m128i)(__v2di){-10, -8}), -1, 0)); int test_mm_cmpistra(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmpistra diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index ee44868fa56f..56ff73f08ab3 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -7,8 +7,18 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> +#include "builtin_test_helpers.h" // NOTE: This should match the tests in llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll @@ -17,18 +27,21 @@ __m128i test_mm_abs_epi8(__m128i a) { // CHECK: [[ABS:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %{{.*}}, i1 false) return _mm_abs_epi8(a); } +TEST_CONSTEXPR(match_v16qi(_mm_abs_epi8((__m128i)(__v16qs){+100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20}), 100, 50, 100, 20, 80, 50, 120, 20, 100, 50, 100, 20, 80, 50, 120, 20)); __m128i test_mm_abs_epi16(__m128i a) { // CHECK-LABEL: test_mm_abs_epi16 // CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %{{.*}}, i1 false) return _mm_abs_epi16(a); } +TEST_CONSTEXPR(match_v8hi(_mm_abs_epi16((__m128i)(__v8hi){+32000, -32000, +6, -60, +80, -50, +120, -20}), 32000, 32000, 6, 60, 80, 50, 120, 20)); __m128i test_mm_abs_epi32(__m128i a) { // CHECK-LABEL: test_mm_abs_epi32 // CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false) return _mm_abs_epi32(a); } +TEST_CONSTEXPR(match_v4si(_mm_abs_epi32((__m128i)(__v4si){-5, -1, 0, 1}), 5, 1, 0, 1)); __m128i test_mm_alignr_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_alignr_epi8 diff --git a/clang/test/CodeGen/X86/x86-bswap.c b/clang/test/CodeGen/X86/x86-bswap.c index 589dd8360698..8a5baacf5569 100644 --- a/clang/test/CodeGen/X86/x86-bswap.c +++ b/clang/test/CodeGen/X86/x86-bswap.c @@ -1,45 +1,41 @@ // RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -no-enable-noundef-analysis -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -no-enable-noundef-analysis -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -no-enable-noundef-analysis -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -no-enable-noundef-analysis -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s + + #include <x86intrin.h> +#include "builtin_test_helpers.h" int test__bswapd(int X) { // CHECK-LABEL: test__bswapd // CHECK: call i32 @llvm.bswap.i32 return __bswapd(X); } +TEST_CONSTEXPR(__bswapd(0x00000000) == 0x00000000); +TEST_CONSTEXPR(__bswapd(0x01020304) == 0x04030201); int test_bswap(int X) { // CHECK-LABEL: test_bswap // CHECK: call i32 @llvm.bswap.i32 return _bswap(X); } +TEST_CONSTEXPR(_bswap(0x00000000) == 0x00000000); +TEST_CONSTEXPR(_bswap(0x10203040) == 0x40302010); long test__bswapq(long long X) { // CHECK-LABEL: test__bswapq // CHECK: call i64 @llvm.bswap.i64 return __bswapq(X); } +TEST_CONSTEXPR(__bswapq(0x0000000000000000ULL) == 0x0000000000000000); +TEST_CONSTEXPR(__bswapq(0x0102030405060708ULL) == 0x0807060504030201); long test_bswap64(long long X) { // CHECK-LABEL: test_bswap64 // CHECK: call i64 @llvm.bswap.i64 return _bswap64(X); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) - -char bswapd_0[__bswapd(0x00000000) == 0x00000000 ? 1 : -1]; -char bswapd_1[__bswapd(0x01020304) == 0x04030201 ? 1 : -1]; - -char bswap_0[_bswap(0x00000000) == 0x00000000 ? 1 : -1]; -char bswap_1[_bswap(0x10203040) == 0x40302010 ? 1 : -1]; - -char bswapq_0[__bswapq(0x0000000000000000ULL) == 0x0000000000000000 ? 1 : -1]; -char bswapq_1[__bswapq(0x0102030405060708ULL) == 0x0807060504030201 ? 1 : -1]; - -char bswap64_0[_bswap64(0x0000000000000000ULL) == 0x0000000000000000 ? 1 : -1]; -char bswap64_1[_bswap64(0x1020304050607080ULL) == 0x8070605040302010 ? 1 : -1]; - -#endif +TEST_CONSTEXPR(_bswap64(0x0000000000000000ULL) == 0x0000000000000000); +TEST_CONSTEXPR(_bswap64(0x1020304050607080ULL) == 0x8070605040302010); diff --git a/clang/test/CodeGen/X86/x86-builtins.c b/clang/test/CodeGen/X86/x86-builtins.c index e503e475db01..37cfe3e242f5 100644 --- a/clang/test/CodeGen/X86/x86-builtins.c +++ b/clang/test/CodeGen/X86/x86-builtins.c @@ -1,39 +1,40 @@ // RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s + +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s #include <x86intrin.h> +#include "builtin_test_helpers.h" unsigned int test_castf32_u32 (float __A){ // CHECK-LABEL: test_castf32_u32 // CHECK: %{{.*}} = load i32, ptr %{{.*}}, align 4 return _castf32_u32(__A); } +TEST_CONSTEXPR(_castf32_u32(-0.0f) == 0x80000000); unsigned long long test_castf64_u64 (double __A){ // CHECK-LABEL: test_castf64_u64 // CHECK: %{{.*}} = load i64, ptr %{{.*}}, align 8 return _castf64_u64(__A); } +TEST_CONSTEXPR(_castf64_u64(-0.0) == 0x8000000000000000); float test_castu32_f32 (unsigned int __A){ // CHECK-LABEL: test_castu32_f32 // CHECK: %{{.*}} = load float, ptr %{{.*}}, align 4 return _castu32_f32(__A); } +TEST_CONSTEXPR(_castu32_f32(0x3F800000) == +1.0f); double test_castu64_f64 (unsigned long long __A){ // CHECK-LABEL: test_castu64_f64 // CHECK: %{{.*}} = load double, ptr %{{.*}}, align 8 return _castu64_f64(__A); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) -char cast_f32_u32_0[_castf32_u32(-0.0f) == 0x80000000 ? 1 : -1]; -char cast_u32_f32_0[_castu32_f32(0x3F800000) == +1.0f ? 1 : -1]; - -char castf64_u64_0[_castf64_u64(-0.0) == 0x8000000000000000 ? 1 : -1]; -char castu64_f64_0[_castu64_f64(0xBFF0000000000000ULL) == -1.0 ? 1 : -1]; -#endif +TEST_CONSTEXPR(_castu64_f64(0xBFF0000000000000ULL) == -1.0); diff --git a/clang/test/CodeGen/X86/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c index eb9f832ac9b0..a3cff2c89da1 100644 --- a/clang/test/CodeGen/X86/xop-builtins.c +++ b/clang/test/CodeGen/X86/xop-builtins.c @@ -7,6 +7,15 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include <x86intrin.h> #include "builtin_test_helpers.h" @@ -206,48 +215,56 @@ __m128i test_mm_rot_epi8(__m128i a, __m128i b) { // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_rot_epi8(a, b); } +TEST_CONSTEXPR(match_v16qi(_mm_rot_epi8((__m128i)(__v16qs){15, -14, -13, -12, 11, 10, 9, 8, 7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 15, -27, -4, -89, -80, 65, 36, 4, 7, 12, 65, -25, 48, -33, 4, 0)); __m128i test_mm_rot_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_rot_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_rot_epi16(a, b); } +TEST_CONSTEXPR(match_v8hi(_mm_rot_epi16((__m128i)(__v8hi){7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v8hi){0, 1, -2, 3, -4, 5, -6, 7}), 7, 12, 16385, -25, 12288, -33, 1024, 0)); __m128i test_mm_rot_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_rot_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_rot_epi32(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_rot_epi32((__m128i)(__v4si){3, -2, 1, 0}, (__m128i)(__v4si){0, 1, -2, 3}), 3, -3, 1073741824, 0)); __m128i test_mm_rot_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_rot_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_rot_epi64(a, b); } +TEST_CONSTEXPR(match_v2di(_mm_rot_epi64((__m128i)(__v2di){99, -55}, (__m128i)(__v2di){1, -2}), 198, 9223372036854775794LL)); __m128i test_mm_roti_epi8(__m128i a) { // CHECK-LABEL: test_mm_roti_epi8 // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> splat (i8 1)) return _mm_roti_epi8(a, 1); } +TEST_CONSTEXPR(match_v16qi(_mm_roti_epi8(((__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 3), 0, 8, -9, 24, -25, 40, -41, 56, -57, 72, -73, 88, -89, 104, -105, 120)); __m128i test_mm_roti_epi16(__m128i a) { // CHECK-LABEL: test_mm_roti_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 50)) return _mm_roti_epi16(a, 50); } +TEST_CONSTEXPR(match_v8hi(_mm_roti_epi16(((__m128i)(__v8hi){2, -3, 4, -5, 6, -7, 8, -9}), 1), 4, -5, 8, -9, 12, -13, 16, -17)); __m128i test_mm_roti_epi32(__m128i a) { // CHECK-LABEL: test_mm_roti_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 226)) return _mm_roti_epi32(a, -30); } +TEST_CONSTEXPR(match_v4si(_mm_roti_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97)); __m128i test_mm_roti_epi64(__m128i a) { // CHECK-LABEL: test_mm_roti_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 100)) return _mm_roti_epi64(a, 100); } +TEST_CONSTEXPR(match_v2di(_mm_roti_epi64(((__m128i)(__v2di){99, -55}), 19), 51904512, -28311553)); __m128i test_mm_shl_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_shl_epi8 |
