summaryrefslogtreecommitdiff
path: root/llvm/test/Instrumentation
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Instrumentation')
-rw-r--r--llvm/test/Instrumentation/AddressSanitizer/asan-scalable-vector.ll27
-rw-r--r--llvm/test/Instrumentation/BoundsChecking/runtimes.ll4
-rw-r--r--llvm/test/Instrumentation/HWAddressSanitizer/globals.ll3
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll26
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll34
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll168
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll416
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll84
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll204
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll144
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll64
11 files changed, 503 insertions, 671 deletions
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-scalable-vector.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-scalable-vector.ll
new file mode 100644
index 000000000000..6a841f2d399c
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-scalable-vector.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes='asan<use-after-scope>' -S | FileCheck %s
+
+define void @test() #1 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CTX_PG:%.*]] = alloca <vscale x 16 x i1>, align 2
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[CTX_PG]])
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr inttoptr (i64 17592186044416 to ptr), align 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
+; CHECK: [[BB2]]:
+; CHECK-NEXT: call void @__asan_report_store8(i64 0) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: unreachable
+; CHECK: [[BB3]]:
+; CHECK-NEXT: store ptr [[CTX_PG]], ptr null, align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %ctx_pg = alloca <vscale x 16 x i1>, align 2
+ call void @llvm.lifetime.start.p0(ptr %ctx_pg)
+ store ptr %ctx_pg, ptr null, align 8
+ ret void
+}
+
+attributes #1 = { sanitize_address }
diff --git a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
index 2006a6db2ef4..84dd51cd3fa2 100644
--- a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
+++ b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
@@ -182,7 +182,7 @@ define void @f1(i64 %x) nounwind {
; RT: attributes #[[ATTR0]] = { nounwind }
;.
; TR-NOMERGE: attributes #[[ATTR0]] = { nounwind }
-; TR-NOMERGE: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind }
+; TR-NOMERGE: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
; TR-NOMERGE: attributes #[[ATTR2]] = { nomerge noreturn nounwind }
;.
; RT-NOMERGE: attributes #[[ATTR0]] = { nounwind }
@@ -201,7 +201,7 @@ define void @f1(i64 %x) nounwind {
;.
; TR-GUARD: attributes #[[ATTR0]] = { nounwind }
; TR-GUARD: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
-; TR-GUARD: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind }
+; TR-GUARD: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
; TR-GUARD: attributes #[[ATTR3]] = { nomerge noreturn nounwind }
;.
; RT-GUARD: attributes #[[ATTR0]] = { nounwind }
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/globals.ll b/llvm/test/Instrumentation/HWAddressSanitizer/globals.ll
index 4c2852391d53..dee92bc206b6 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/globals.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/globals.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -passes=hwasan -mtriple=aarch64--linux-android29 | FileCheck --check-prefixes=CHECK,CHECK29,NOALLGLOBALS %s
+; RUN: opt < %s -S -passes=hwasan -mtriple=aarch64--linux-android29 -hwasan-static-linking=1 | FileCheck --check-prefixes=CHECK29,STATICLINKING %s
; RUN: opt < %s -S -passes=hwasan -mtriple=aarch64--linux-android30 | FileCheck --check-prefixes=CHECK,CHECK30,NOALLGLOBALS %s
; RUN: opt < %s -S -passes=hwasan -mtriple=riscv64-unknown-elf -hwasan-globals=1 -hwasan-all-globals=1 | FileCheck --check-prefixes=CHECK,CHECK30,ALLGLOBALS %s
@@ -24,6 +25,8 @@
; CHECK: @hwasan.note = private constant { i32, i32, i32, [8 x i8], i32, i32 } { i32 8, i32 8, i32 3, [8 x i8] c"LLVM\00\00\00\00", i32 trunc (i64 sub (i64 ptrtoint (ptr @__start_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @__stop_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32) }, section ".note.hwasan.globals", comdat($hwasan.module_ctor), align 4
+; STATICLINKING-NOT: @hwasan.note = private constant { i32, i32, i32, [8 x i8], i32, i32 } { i32 8, i32 8, i32 3, [8 x i8] c"LLVM\00\00\00\00", i32 trunc (i64 sub (i64 ptrtoint (ptr @__start_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @__stop_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32) }, section ".note.hwasan.globals", comdat($hwasan.module_ctor), align 4
+
; CHECK: @hwasan.dummy.global = private constant [0 x i8] zeroinitializer, section "hwasan_globals", comdat($hwasan.module_ctor), !associated [[NOTE:![0-9]+]]
; CHECK30: @four = alias i32, inttoptr (i64 add (i64 ptrtoint (ptr @four.hwasan to i64), i64 -6052837899185946624) to ptr)
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll
index 298dc4b2c853..93006ae30f92 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -mattr=+avx10.2-512 -passes=msan -S | FileCheck %s
+; RUN: opt < %s -mattr=+avx10.2 -passes=msan -S | FileCheck %s
; Forked from llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll
;
@@ -156,11 +156,9 @@ define <16 x i32> @test_mm512_dpbssd_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr
; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i1> [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP21:%.*]] = or <64 x i1> [[TMP20]], [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = sext <64 x i1> [[TMP21]] to <64 x i8>
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i8> [[TMP22]] to <32 x i16>
-; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <32 x i16> [[TMP23]], zeroinitializer
-; CHECK-NEXT: [[TMP25:%.*]] = sext <32 x i1> [[TMP24]] to <32 x i16>
-; CHECK-NEXT: [[TMP26:%.*]] = bitcast <32 x i16> [[TMP25]] to i512
-; CHECK-NEXT: [[TMP27:%.*]] = bitcast i512 [[TMP26]] to <16 x i32>
+; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i8> [[TMP22]] to <16 x i32>
+; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <16 x i32> [[TMP23]], zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = sext <16 x i1> [[TMP24]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP27]], [[TMP4]]
; CHECK-NEXT: [[RES:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
@@ -193,11 +191,9 @@ define <16 x i32> @test_mm512_mask_dpbssds_epi32(<16 x i32> %__W, i16 zeroext %_
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <64 x i1> [[TMP17]] to <64 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <32 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <32 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP20]] to <32 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i16> [[TMP21]] to i512
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i512 [[TMP22]] to <16 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <16 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP23]], [[TMP1]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssds.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
@@ -239,11 +235,9 @@ define <16 x i32> @test_mm512_maskz_dpbssd_epi32(i16 zeroext %__U, <16 x i32> %_
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <64 x i1> [[TMP17]] to <64 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <32 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <32 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP20]] to <32 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i16> [[TMP21]] to i512
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i512 [[TMP22]] to <16 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <16 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP23]], [[TMP24]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll
index e3a26ae07ac1..e121c3b6ea17 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -mattr=+avx10.2-256 -passes=msan -S | FileCheck %s
+; RUN: opt < %s -mattr=+avx10.2 -passes=msan -S | FileCheck %s
; Forked from llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
;
@@ -265,11 +265,9 @@ define <4 x i32> @test_mm_mask_dpbssd_epi32(<4 x i32> %__W, i4 zeroext %__U, <4
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP1]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1>
@@ -311,11 +309,9 @@ define <4 x i32> @test_mm_maskz_dpbssds_epi32(i4 zeroext %__U, <4 x i32> %__W, <
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP24]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1>
@@ -357,11 +353,9 @@ define <8 x i32> @test_mm256_maskz_dpbssds_epi32(<8 x i32> %__W, i8 zeroext %__U
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP1]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -403,11 +397,9 @@ define <8 x i32> @test_mm256_mask_dpbssd_epi32(i8 zeroext %__U, <8 x i32> %__W,
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP24]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll
index 822e546c84bc..216096199fd0 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll
@@ -20,10 +20,10 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1,
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to <32 x i8>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i8> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP24]], zeroinitializer
@@ -34,13 +34,11 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1,
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <32 x i8> [[TMP24]], <32 x i8> [[TMP5]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[TMP4]]
;
@@ -68,10 +66,10 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP10]], align 32
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
; CHECK-NEXT: [[TMP32:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
+; CHECK-NEXT: [[TMP30:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
; CHECK-NEXT: [[TMP33:%.*]] = bitcast <8 x i32> [[_MSLD]] to <32 x i8>
+; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <32 x i8> [[TMP32]], zeroinitializer
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <32 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <32 x i8> [[TMP30]], zeroinitializer
@@ -82,13 +80,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <32 x i8> [[TMP30]], <32 x i8> [[TMP31]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i32> [[_MSPROP1]], <8 x i32> [[TMP2]]
@@ -97,10 +93,10 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i32> [[TMP17]], <8 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP13]], <8 x i32> [[TMP11]], <8 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[X4]] to <32 x i8>
; CHECK-NEXT: [[TMP41:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
+; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
; CHECK-NEXT: [[TMP42:%.*]] = bitcast <8 x i32> [[TMP5]] to <32 x i8>
+; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[X4]] to <32 x i8>
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <32 x i8> [[TMP41]], zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <32 x i8> [[TMP42]], zeroinitializer
; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <32 x i8> [[TMP39]], zeroinitializer
@@ -111,13 +107,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <32 x i8> [[TMP39]], <32 x i8> [[TMP40]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[TMP22:%.*]] = select <8 x i1> [[TMP21]], <8 x i32> [[_MSPROP3]], <8 x i32> zeroinitializer
@@ -151,10 +145,10 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1,
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i8> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i8> [[TMP24]], zeroinitializer
@@ -165,13 +159,11 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1,
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <16 x i8> [[TMP24]], <16 x i8> [[TMP5]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
;
@@ -199,10 +191,10 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 16
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
; CHECK-NEXT: [[TMP32:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+; CHECK-NEXT: [[TMP30:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
; CHECK-NEXT: [[TMP33:%.*]] = bitcast <4 x i32> [[_MSLD]] to <16 x i8>
+; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <16 x i8> [[TMP32]], zeroinitializer
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <16 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <16 x i8> [[TMP30]], zeroinitializer
@@ -213,13 +205,11 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <16 x i8> [[TMP30]], <16 x i8> [[TMP31]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -230,10 +220,10 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: [[TMP17:%.*]] = or <4 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP3]], <4 x i32> [[TMP17]], <4 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[TMP11]], <4 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <4 x i32> [[X4]] to <16 x i8>
; CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+; CHECK-NEXT: [[TMP39:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
; CHECK-NEXT: [[TMP42:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+; CHECK-NEXT: [[TMP40:%.*]] = bitcast <4 x i32> [[X4]] to <16 x i8>
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <16 x i8> [[TMP41]], zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <16 x i8> [[TMP42]], zeroinitializer
; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <16 x i8> [[TMP39]], zeroinitializer
@@ -244,13 +234,11 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP5:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <16 x i8> [[TMP39]], <16 x i8> [[TMP40]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[_MSPROP6:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -286,10 +274,10 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1,
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to <32 x i8>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i8> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP24]], zeroinitializer
@@ -300,13 +288,11 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1,
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <32 x i8> [[TMP24]], <32 x i8> [[TMP5]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[TMP4]]
;
@@ -334,10 +320,10 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP10]], align 32
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
; CHECK-NEXT: [[TMP32:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
+; CHECK-NEXT: [[TMP30:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
; CHECK-NEXT: [[TMP33:%.*]] = bitcast <8 x i32> [[_MSLD]] to <32 x i8>
+; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <32 x i8> [[TMP32]], zeroinitializer
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <32 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <32 x i8> [[TMP30]], zeroinitializer
@@ -348,13 +334,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <32 x i8> [[TMP30]], <32 x i8> [[TMP31]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i32> [[_MSPROP1]], <8 x i32> [[TMP2]]
@@ -363,10 +347,10 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i32> [[TMP17]], <8 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP13]], <8 x i32> [[TMP11]], <8 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[X4]] to <32 x i8>
; CHECK-NEXT: [[TMP41:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
+; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
; CHECK-NEXT: [[TMP42:%.*]] = bitcast <8 x i32> [[TMP5]] to <32 x i8>
+; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[X4]] to <32 x i8>
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <32 x i8> [[TMP41]], zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <32 x i8> [[TMP42]], zeroinitializer
; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <32 x i8> [[TMP39]], zeroinitializer
@@ -377,13 +361,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <32 x i8> [[TMP39]], <32 x i8> [[TMP40]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[TMP22:%.*]] = select <8 x i1> [[TMP21]], <8 x i32> [[_MSPROP3]], <8 x i32> zeroinitializer
@@ -417,10 +399,10 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1,
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i8> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i8> [[TMP24]], zeroinitializer
@@ -431,13 +413,11 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1,
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <16 x i8> [[TMP24]], <16 x i8> [[TMP5]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
;
@@ -465,10 +445,10 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 16
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
; CHECK-NEXT: [[TMP32:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+; CHECK-NEXT: [[TMP30:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
; CHECK-NEXT: [[TMP33:%.*]] = bitcast <4 x i32> [[_MSLD]] to <16 x i8>
+; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <16 x i8> [[TMP32]], zeroinitializer
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <16 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <16 x i8> [[TMP30]], zeroinitializer
@@ -479,13 +459,11 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32
; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <16 x i8> [[TMP30]], <16 x i8> [[TMP31]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -496,10 +474,10 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32
; CHECK-NEXT: [[TMP17:%.*]] = or <4 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP3]], <4 x i32> [[TMP17]], <4 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[TMP11]], <4 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <4 x i32> [[X4]] to <16 x i8>
; CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+; CHECK-NEXT: [[TMP39:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
; CHECK-NEXT: [[TMP42:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+; CHECK-NEXT: [[TMP40:%.*]] = bitcast <4 x i32> [[X4]] to <16 x i8>
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <16 x i8> [[TMP41]], zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <16 x i8> [[TMP42]], zeroinitializer
; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <16 x i8> [[TMP39]], zeroinitializer
@@ -510,13 +488,11 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32
; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP5:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <16 x i8> [[TMP39]], <16 x i8> [[TMP40]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[_MSPROP6:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll
index 38f4272ef106..26b1306e0389 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll
@@ -10,51 +10,45 @@
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <32 x i8>, <32 x i8>)
-define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) sanitize_memory {
+define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_vpdpbusd_256(
-; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-SAME: <8 x i32> [[X0:%.*]], <32 x i8> [[X1:%.*]], <32 x i8> [[X2:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[TMP24:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to <32 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <32 x i8> [[X1]], <32 x i8> [[X2]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[TMP4]]
;
- %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
ret <8 x i32> %1
}
-define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) sanitize_memory {
+define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, ptr %x2p, <32 x i8> %x4, i8 %x3) sanitize_memory {
; CHECK-LABEL: define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(
-; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], ptr [[X2P:%.*]], <8 x i32> [[X4:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: <8 x i32> [[X0:%.*]], <32 x i8> [[X1:%.*]], ptr [[X2P:%.*]], <32 x i8> [[X4:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP33:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: [[TMP40:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1:![0-9]+]]
@@ -62,32 +56,26 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: [[BB7]]:
-; CHECK-NEXT: [[X2:%.*]] = load <8 x i32>, ptr [[X2P]], align 32
+; CHECK-NEXT: [[X2:%.*]] = load <32 x i8>, ptr [[X2P]], align 32
; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP10]], align 32
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
-; CHECK-NEXT: [[TMP32:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast <8 x i32> [[_MSLD]] to <32 x i8>
-; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <32 x i8> [[TMP32]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = load <32 x i8>, ptr [[TMP10]], align 32
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <32 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <32 x i8> [[TMP30]], zeroinitializer
-; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <32 x i8> [[TMP31]], zeroinitializer
-; CHECK-NEXT: [[TMP38:%.*]] = and <32 x i1> [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP58:%.*]] = and <32 x i1> [[TMP36]], [[TMP35]]
-; CHECK-NEXT: [[TMP59:%.*]] = and <32 x i1> [[TMP34]], [[TMP37]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <32 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <32 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP38:%.*]] = and <32 x i1> [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP58:%.*]] = and <32 x i1> [[TMP31]], [[TMP36]]
+; CHECK-NEXT: [[TMP59:%.*]] = and <32 x i1> [[TMP35]], [[TMP32]]
; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <32 x i8> [[X1]], <32 x i8> [[X2]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i32> [[_MSPROP1]], <8 x i32> [[TMP2]]
@@ -96,27 +84,21 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i32> [[TMP17]], <8 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP13]], <8 x i32> [[TMP11]], <8 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[X4]] to <32 x i8>
-; CHECK-NEXT: [[TMP41:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
-; CHECK-NEXT: [[TMP42:%.*]] = bitcast <8 x i32> [[TMP5]] to <32 x i8>
-; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <32 x i8> [[TMP41]], zeroinitializer
-; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <32 x i8> [[TMP42]], zeroinitializer
-; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <32 x i8> [[TMP39]], zeroinitializer
+; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <32 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <32 x i8> [[TMP40]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = and <32 x i1> [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP48:%.*]] = and <32 x i1> [[TMP45]], [[TMP44]]
-; CHECK-NEXT: [[TMP49:%.*]] = and <32 x i1> [[TMP43]], [[TMP46]]
+; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <32 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP41:%.*]] = icmp ne <32 x i8> [[X4]], zeroinitializer
+; CHECK-NEXT: [[TMP47:%.*]] = and <32 x i1> [[TMP39]], [[TMP46]]
+; CHECK-NEXT: [[TMP48:%.*]] = and <32 x i1> [[TMP37]], [[TMP46]]
+; CHECK-NEXT: [[TMP49:%.*]] = and <32 x i1> [[TMP39]], [[TMP41]]
; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <32 x i8> [[X1]], <32 x i8> [[X4]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[TMP22:%.*]] = select <8 x i1> [[TMP21]], <8 x i32> [[_MSPROP3]], <8 x i32> zeroinitializer
@@ -132,11 +114,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: store { <8 x i32>, <8 x i32> } [[TMP28]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { <8 x i32>, <8 x i32> } [[RES2]]
;
- %x2 = load <8 x i32>, ptr %x2p
- %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %x2 = load <32 x i8>, ptr %x2p
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
%2 = bitcast i8 %x3 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
- %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x4)
%5 = bitcast i8 %x3 to <8 x i1>
%6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
%res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
@@ -144,51 +126,45 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
ret { <8 x i32>, <8 x i32> } %res2
}
-declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <16 x i8>, <16 x i8>)
-define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) sanitize_memory {
+define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_vpdpbusd_128(
-; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-SAME: <4 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], <16 x i8> [[X2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP24:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i8> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <16 x i8> [[X1]], <16 x i8> [[X2]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
;
- %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
ret <4 x i32> %1
}
-define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) sanitize_memory {
+define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, ptr %x2p, <16 x i8> %x4, i8 %x3) sanitize_memory {
; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(
-; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], ptr [[X2P:%.*]], <4 x i32> [[X4:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: <4 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], ptr [[X2P:%.*]], <16 x i8> [[X4:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP33:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: [[TMP40:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -196,32 +172,26 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: [[BB7]]:
-; CHECK-NEXT: [[X2:%.*]] = load <4 x i32>, ptr [[X2P]], align 16
+; CHECK-NEXT: [[X2:%.*]] = load <16 x i8>, ptr [[X2P]], align 16
; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 16
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
-; CHECK-NEXT: [[TMP32:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast <4 x i32> [[_MSLD]] to <16 x i8>
-; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <16 x i8> [[TMP32]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = load <16 x i8>, ptr [[TMP10]], align 16
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <16 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <16 x i8> [[TMP30]], zeroinitializer
-; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <16 x i8> [[TMP31]], zeroinitializer
-; CHECK-NEXT: [[TMP38:%.*]] = and <16 x i1> [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP58:%.*]] = and <16 x i1> [[TMP36]], [[TMP35]]
-; CHECK-NEXT: [[TMP59:%.*]] = and <16 x i1> [[TMP34]], [[TMP37]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <16 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <16 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP38:%.*]] = and <16 x i1> [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP58:%.*]] = and <16 x i1> [[TMP31]], [[TMP36]]
+; CHECK-NEXT: [[TMP59:%.*]] = and <16 x i1> [[TMP35]], [[TMP32]]
; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <16 x i8> [[X1]], <16 x i8> [[X2]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -232,27 +202,21 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: [[TMP17:%.*]] = or <4 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP2]], <4 x i32> [[TMP17]], <4 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[TMP11]], <4 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <4 x i32> [[X4]] to <16 x i8>
-; CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-; CHECK-NEXT: [[TMP42:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <16 x i8> [[TMP41]], zeroinitializer
-; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <16 x i8> [[TMP42]], zeroinitializer
-; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <16 x i8> [[TMP39]], zeroinitializer
+; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <16 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <16 x i8> [[TMP40]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = and <16 x i1> [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP48:%.*]] = and <16 x i1> [[TMP45]], [[TMP44]]
-; CHECK-NEXT: [[TMP49:%.*]] = and <16 x i1> [[TMP43]], [[TMP46]]
+; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <16 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP41:%.*]] = icmp ne <16 x i8> [[X4]], zeroinitializer
+; CHECK-NEXT: [[TMP47:%.*]] = and <16 x i1> [[TMP39]], [[TMP46]]
+; CHECK-NEXT: [[TMP48:%.*]] = and <16 x i1> [[TMP37]], [[TMP46]]
+; CHECK-NEXT: [[TMP49:%.*]] = and <16 x i1> [[TMP39]], [[TMP41]]
; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <16 x i8> [[X1]], <16 x i8> [[X4]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[_MSPROP5:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -270,12 +234,12 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: store { <4 x i32>, <4 x i32> } [[TMP28]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } [[RES2]]
;
- %x2 = load <4 x i32>, ptr %x2p
- %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %x2 = load <16 x i8>, ptr %x2p
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
%2 = bitcast i8 %x3 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
- %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x4)
%5 = bitcast i8 %x3 to <8 x i1>
%extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
@@ -284,51 +248,45 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
ret { <4 x i32>, <4 x i32> } %res2
}
-declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <32 x i8>, <32 x i8>)
-define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) sanitize_memory {
+define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_vpdpbusds_256(
-; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-SAME: <8 x i32> [[X0:%.*]], <32 x i8> [[X1:%.*]], <32 x i8> [[X2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP24:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to <32 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <32 x i8> [[X1]], <32 x i8> [[X2]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[TMP4]]
;
- %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
ret <8 x i32> %1
}
-define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) sanitize_memory {
+define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, ptr %x2p, <32 x i8> %x4, i8 %x3) sanitize_memory {
; CHECK-LABEL: define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(
-; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], ptr [[X2P:%.*]], <8 x i32> [[X4:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: <8 x i32> [[X0:%.*]], <32 x i8> [[X1:%.*]], ptr [[X2P:%.*]], <32 x i8> [[X4:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP33:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: [[TMP40:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -336,32 +294,26 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: [[BB7]]:
-; CHECK-NEXT: [[X2:%.*]] = load <8 x i32>, ptr [[X2P]], align 32
+; CHECK-NEXT: [[X2:%.*]] = load <32 x i8>, ptr [[X2P]], align 32
; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP10]], align 32
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
-; CHECK-NEXT: [[TMP32:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast <8 x i32> [[_MSLD]] to <32 x i8>
-; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <32 x i8> [[TMP32]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = load <32 x i8>, ptr [[TMP10]], align 32
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <32 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <32 x i8> [[TMP30]], zeroinitializer
-; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <32 x i8> [[TMP31]], zeroinitializer
-; CHECK-NEXT: [[TMP38:%.*]] = and <32 x i1> [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP58:%.*]] = and <32 x i1> [[TMP36]], [[TMP35]]
-; CHECK-NEXT: [[TMP59:%.*]] = and <32 x i1> [[TMP34]], [[TMP37]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <32 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <32 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP38:%.*]] = and <32 x i1> [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP58:%.*]] = and <32 x i1> [[TMP31]], [[TMP36]]
+; CHECK-NEXT: [[TMP59:%.*]] = and <32 x i1> [[TMP35]], [[TMP32]]
; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <32 x i8> [[X1]], <32 x i8> [[X2]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i32> [[_MSPROP1]], <8 x i32> [[TMP2]]
@@ -370,27 +322,21 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i32> [[TMP17]], <8 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP13]], <8 x i32> [[TMP11]], <8 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[X4]] to <32 x i8>
-; CHECK-NEXT: [[TMP41:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
-; CHECK-NEXT: [[TMP42:%.*]] = bitcast <8 x i32> [[TMP5]] to <32 x i8>
-; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <32 x i8> [[TMP41]], zeroinitializer
-; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <32 x i8> [[TMP42]], zeroinitializer
-; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <32 x i8> [[TMP39]], zeroinitializer
+; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <32 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <32 x i8> [[TMP40]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = and <32 x i1> [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP48:%.*]] = and <32 x i1> [[TMP45]], [[TMP44]]
-; CHECK-NEXT: [[TMP49:%.*]] = and <32 x i1> [[TMP43]], [[TMP46]]
+; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <32 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP41:%.*]] = icmp ne <32 x i8> [[X4]], zeroinitializer
+; CHECK-NEXT: [[TMP47:%.*]] = and <32 x i1> [[TMP39]], [[TMP46]]
+; CHECK-NEXT: [[TMP48:%.*]] = and <32 x i1> [[TMP37]], [[TMP46]]
+; CHECK-NEXT: [[TMP49:%.*]] = and <32 x i1> [[TMP39]], [[TMP41]]
; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <32 x i8> [[X1]], <32 x i8> [[X4]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[TMP22:%.*]] = select <8 x i1> [[TMP21]], <8 x i32> [[_MSPROP3]], <8 x i32> zeroinitializer
@@ -406,11 +352,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; CHECK-NEXT: store { <8 x i32>, <8 x i32> } [[TMP28]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { <8 x i32>, <8 x i32> } [[RES2]]
;
- %x2 = load <8 x i32>, ptr %x2p
- %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %x2 = load <32 x i8>, ptr %x2p
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
%2 = bitcast i8 %x3 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
- %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x4)
%5 = bitcast i8 %x3 to <8 x i1>
%6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
%res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
@@ -418,51 +364,45 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
ret { <8 x i32>, <8 x i32> } %res2
}
-declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <16 x i8>, <16 x i8>)
-define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) sanitize_memory {
+define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_vpdpbusds_128(
-; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-SAME: <4 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], <16 x i8> [[X2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP24:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i8> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <16 x i8> [[X1]], <16 x i8> [[X2]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
;
- %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
ret <4 x i32> %1
}
-define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) sanitize_memory {
+define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, ptr %x2p, <16 x i8> %x4, i8 %x3) sanitize_memory {
; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(
-; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], ptr [[X2P:%.*]], <4 x i32> [[X4:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: <4 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], ptr [[X2P:%.*]], <16 x i8> [[X4:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP33:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: [[TMP40:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -470,32 +410,26 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: [[BB7]]:
-; CHECK-NEXT: [[X2:%.*]] = load <4 x i32>, ptr [[X2P]], align 16
+; CHECK-NEXT: [[X2:%.*]] = load <16 x i8>, ptr [[X2P]], align 16
; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 16
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
-; CHECK-NEXT: [[TMP32:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast <4 x i32> [[_MSLD]] to <16 x i8>
-; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <16 x i8> [[TMP32]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = load <16 x i8>, ptr [[TMP10]], align 16
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <16 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <16 x i8> [[TMP30]], zeroinitializer
-; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <16 x i8> [[TMP31]], zeroinitializer
-; CHECK-NEXT: [[TMP38:%.*]] = and <16 x i1> [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP58:%.*]] = and <16 x i1> [[TMP36]], [[TMP35]]
-; CHECK-NEXT: [[TMP59:%.*]] = and <16 x i1> [[TMP34]], [[TMP37]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <16 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <16 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP38:%.*]] = and <16 x i1> [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP58:%.*]] = and <16 x i1> [[TMP31]], [[TMP36]]
+; CHECK-NEXT: [[TMP59:%.*]] = and <16 x i1> [[TMP35]], [[TMP32]]
; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <16 x i8> [[X1]], <16 x i8> [[X2]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -506,27 +440,21 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32
; CHECK-NEXT: [[TMP17:%.*]] = or <4 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP2]], <4 x i32> [[TMP17]], <4 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[TMP11]], <4 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <4 x i32> [[X4]] to <16 x i8>
-; CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-; CHECK-NEXT: [[TMP42:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <16 x i8> [[TMP41]], zeroinitializer
-; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <16 x i8> [[TMP42]], zeroinitializer
-; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <16 x i8> [[TMP39]], zeroinitializer
+; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <16 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <16 x i8> [[TMP40]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = and <16 x i1> [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP48:%.*]] = and <16 x i1> [[TMP45]], [[TMP44]]
-; CHECK-NEXT: [[TMP49:%.*]] = and <16 x i1> [[TMP43]], [[TMP46]]
+; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <16 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP41:%.*]] = icmp ne <16 x i8> [[X4]], zeroinitializer
+; CHECK-NEXT: [[TMP47:%.*]] = and <16 x i1> [[TMP39]], [[TMP46]]
+; CHECK-NEXT: [[TMP48:%.*]] = and <16 x i1> [[TMP37]], [[TMP46]]
+; CHECK-NEXT: [[TMP49:%.*]] = and <16 x i1> [[TMP39]], [[TMP41]]
; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <16 x i8> [[X1]], <16 x i8> [[X4]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1>
; CHECK-NEXT: [[_MSPROP5:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -544,12 +472,12 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32
; CHECK-NEXT: store { <4 x i32>, <4 x i32> } [[TMP28]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } [[RES2]]
;
- %x2 = load <4 x i32>, ptr %x2p
- %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %x2 = load <16 x i8>, ptr %x2p
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
%2 = bitcast i8 %x3 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
- %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x4)
%5 = bitcast i8 %x3 to <8 x i1>
%extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll
index f146823b90e0..f6410c6799a5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll
@@ -20,10 +20,10 @@ define <16 x i32>@test_int_x86_avx512_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <64 x i8> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <64 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <64 x i8> [[TMP24]], zeroinitializer
@@ -34,13 +34,11 @@ define <16 x i32>@test_int_x86_avx512_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x
; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP16]] to <64 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <32 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[TMP20]] to i512
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i512 [[TMP21]] to <16 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <16 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <64 x i8> [[TMP24]], <64 x i8> [[TMP5]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[TMP4]]
;
@@ -68,10 +66,10 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP10]], align 64
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
; CHECK-NEXT: [[TMP32:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
+; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
; CHECK-NEXT: [[TMP33:%.*]] = bitcast <16 x i32> [[_MSLD]] to <64 x i8>
+; CHECK-NEXT: [[TMP31:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <64 x i8> [[TMP32]], zeroinitializer
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <64 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <64 x i8> [[TMP30]], zeroinitializer
@@ -82,13 +80,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <64 x i1> [[TMP61]] to <64 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i8> [[TMP62]] to <32 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <32 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <32 x i1> [[TMP64]] to <32 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <32 x i16> [[TMP65]] to i512
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i512 [[TMP66]] to <16 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP62]] to <16 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <64 x i8> [[TMP30]], <64 x i8> [[TMP31]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3]] to <16 x i1>
; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP2]]
@@ -97,10 +93,10 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP17]], <16 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP11]], <16 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i32> [[X4]] to <64 x i8>
; CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
+; CHECK-NEXT: [[TMP39:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
; CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i32> [[TMP5]] to <64 x i8>
+; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i32> [[X4]] to <64 x i8>
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <64 x i8> [[TMP41]], zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <64 x i8> [[TMP42]], zeroinitializer
; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <64 x i8> [[TMP39]], zeroinitializer
@@ -111,13 +107,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <64 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <64 x i1> [[TMP51]] to <64 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP52]] to <32 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <32 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <32 x i1> [[TMP54]] to <32 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i16> [[TMP55]] to i512
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i512 [[TMP56]] to <16 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i8> [[TMP52]] to <16 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <16 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <16 x i1> [[TMP56]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <64 x i8> [[TMP39]], <64 x i8> [[TMP40]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i16 [[X3]] to <16 x i1>
; CHECK-NEXT: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x i32> [[_MSPROP3]], <16 x i32> zeroinitializer
@@ -151,10 +145,10 @@ define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <64 x i8> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <64 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <64 x i8> [[TMP24]], zeroinitializer
@@ -165,13 +159,11 @@ define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %
; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP16]] to <64 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <32 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[TMP20]] to i512
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i512 [[TMP21]] to <16 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <16 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <64 x i8> [[TMP24]], <64 x i8> [[TMP5]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[TMP4]]
;
@@ -199,10 +191,10 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP10]], align 64
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
; CHECK-NEXT: [[TMP32:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
+; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
; CHECK-NEXT: [[TMP33:%.*]] = bitcast <16 x i32> [[_MSLD]] to <64 x i8>
+; CHECK-NEXT: [[TMP31:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <64 x i8> [[TMP32]], zeroinitializer
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <64 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <64 x i8> [[TMP30]], zeroinitializer
@@ -213,13 +205,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <64 x i1> [[TMP61]] to <64 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i8> [[TMP62]] to <32 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <32 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <32 x i1> [[TMP64]] to <32 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <32 x i16> [[TMP65]] to i512
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i512 [[TMP66]] to <16 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP62]] to <16 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <64 x i8> [[TMP30]], <64 x i8> [[TMP31]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3]] to <16 x i1>
; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP2]]
@@ -228,10 +218,10 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP17]], <16 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP11]], <16 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i32> [[X4]] to <64 x i8>
; CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
+; CHECK-NEXT: [[TMP39:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
; CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i32> [[TMP5]] to <64 x i8>
+; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i32> [[X4]] to <64 x i8>
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <64 x i8> [[TMP41]], zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <64 x i8> [[TMP42]], zeroinitializer
; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <64 x i8> [[TMP39]], zeroinitializer
@@ -242,13 +232,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <64 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <64 x i1> [[TMP51]] to <64 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP52]] to <32 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <32 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <32 x i1> [[TMP54]] to <32 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i16> [[TMP55]] to i512
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i512 [[TMP56]] to <16 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i8> [[TMP52]] to <16 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <16 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <16 x i1> [[TMP56]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <64 x i8> [[TMP39]], <64 x i8> [[TMP40]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i16 [[X3]] to <16 x i1>
; CHECK-NEXT: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x i32> [[_MSPROP3]], <16 x i32> zeroinitializer
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll
index 7c39ff6bb2be..6d4ce6dec519 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll
@@ -10,51 +10,45 @@
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>)
+declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <64 x i8>, <64 x i8>)
-define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) sanitize_memory {
+define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(
-; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-SAME: <16 x i32> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i8> [[X2:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[TMP24:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <64 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <64 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <64 x i8> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <64 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <64 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <64 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <64 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP16]] to <64 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <32 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[TMP20]] to i512
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i512 [[TMP21]] to <16 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <16 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <64 x i8> [[X1]], <64 x i8> [[X2]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[TMP4]]
;
- %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2)
ret <16 x i32> %1
}
-define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) sanitize_memory {
+define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <64 x i8> %x1, ptr %x2p, <64 x i8> %x4, i16 %x3) sanitize_memory {
; CHECK-LABEL: define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(
-; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], ptr [[X2P:%.*]], <16 x i32> [[X4:%.*]], i16 [[X3:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: <16 x i32> [[X0:%.*]], <64 x i8> [[X1:%.*]], ptr [[X2P:%.*]], <64 x i8> [[X4:%.*]], i16 [[X3:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP33:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
+; CHECK-NEXT: [[TMP40:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1:![0-9]+]]
@@ -62,32 +56,26 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: [[BB7]]:
-; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P]], align 64
+; CHECK-NEXT: [[TMP31:%.*]] = load <64 x i8>, ptr [[X2P]], align 64
; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP10]], align 64
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
-; CHECK-NEXT: [[TMP32:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast <16 x i32> [[_MSLD]] to <64 x i8>
-; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <64 x i8> [[TMP32]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = load <64 x i8>, ptr [[TMP10]], align 64
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <64 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <64 x i8> [[TMP30]], zeroinitializer
+; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <64 x i8> [[X1]], zeroinitializer
; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <64 x i8> [[TMP31]], zeroinitializer
-; CHECK-NEXT: [[TMP38:%.*]] = and <64 x i1> [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP58:%.*]] = and <64 x i1> [[TMP36]], [[TMP35]]
-; CHECK-NEXT: [[TMP59:%.*]] = and <64 x i1> [[TMP34]], [[TMP37]]
+; CHECK-NEXT: [[TMP38:%.*]] = and <64 x i1> [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP58:%.*]] = and <64 x i1> [[TMP32]], [[TMP36]]
+; CHECK-NEXT: [[TMP59:%.*]] = and <64 x i1> [[TMP35]], [[TMP37]]
; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <64 x i1> [[TMP61]] to <64 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i8> [[TMP62]] to <32 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <32 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <32 x i1> [[TMP64]] to <32 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <32 x i16> [[TMP65]] to i512
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i512 [[TMP66]] to <16 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP62]] to <16 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <64 x i8> [[X1]], <64 x i8> [[TMP31]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3]] to <16 x i1>
; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP2]]
@@ -96,27 +84,21 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP17]], <16 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP11]], <16 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i32> [[X4]] to <64 x i8>
-; CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
-; CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i32> [[TMP5]] to <64 x i8>
-; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <64 x i8> [[TMP41]], zeroinitializer
-; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <64 x i8> [[TMP42]], zeroinitializer
-; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <64 x i8> [[TMP39]], zeroinitializer
+; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <64 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <64 x i8> [[TMP40]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = and <64 x i1> [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP48:%.*]] = and <64 x i1> [[TMP45]], [[TMP44]]
-; CHECK-NEXT: [[TMP49:%.*]] = and <64 x i1> [[TMP43]], [[TMP46]]
+; CHECK-NEXT: [[TMP41:%.*]] = icmp ne <64 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <64 x i8> [[X4]], zeroinitializer
+; CHECK-NEXT: [[TMP47:%.*]] = and <64 x i1> [[TMP39]], [[TMP46]]
+; CHECK-NEXT: [[TMP48:%.*]] = and <64 x i1> [[TMP41]], [[TMP46]]
+; CHECK-NEXT: [[TMP49:%.*]] = and <64 x i1> [[TMP39]], [[TMP42]]
; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <64 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <64 x i1> [[TMP51]] to <64 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP52]] to <32 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <32 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <32 x i1> [[TMP54]] to <32 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i16> [[TMP55]] to i512
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i512 [[TMP56]] to <16 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i8> [[TMP52]] to <16 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <16 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <16 x i1> [[TMP56]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <64 x i8> [[X1]], <64 x i8> [[X4]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i16 [[X3]] to <16 x i1>
; CHECK-NEXT: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x i32> [[_MSPROP3]], <16 x i32> zeroinitializer
@@ -132,11 +114,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; CHECK-NEXT: store { <16 x i32>, <16 x i32> } [[TMP28]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { <16 x i32>, <16 x i32> } [[RES2]]
;
- %x2 = load <16 x i32>, ptr %x2p
- %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %x2 = load <64 x i8>, ptr %x2p
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2)
%2 = bitcast i16 %x3 to <16 x i1>
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
- %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x4)
%5 = bitcast i16 %x3 to <16 x i1>
%6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
%res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
@@ -144,51 +126,45 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
ret { <16 x i32>, <16 x i32> } %res2
}
-declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>)
+declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <64 x i8>, <64 x i8>)
-define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) sanitize_memory {
+define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <16 x i32> @test_int_x86_avx512_vpdpbusds_512(
-; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-SAME: <16 x i32> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i8> [[X2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP24:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <64 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <64 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <64 x i8> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <64 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <64 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <64 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <64 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP16]] to <64 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <32 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[TMP20]] to i512
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i512 [[TMP21]] to <16 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <16 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <64 x i8> [[X1]], <64 x i8> [[X2]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[TMP4]]
;
- %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2)
ret <16 x i32> %1
}
-define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) sanitize_memory {
+define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <64 x i8> %x1, ptr %x2p, <64 x i8> %x4, i16 %x3) sanitize_memory {
; CHECK-LABEL: define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(
-; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], ptr [[X2P:%.*]], <16 x i32> [[X4:%.*]], i16 [[X3:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: <16 x i32> [[X0:%.*]], <64 x i8> [[X1:%.*]], ptr [[X2P:%.*]], <64 x i8> [[X4:%.*]], i16 [[X3:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP33:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
+; CHECK-NEXT: [[TMP40:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -196,32 +172,26 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
; CHECK: [[BB7]]:
-; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P]], align 64
+; CHECK-NEXT: [[TMP31:%.*]] = load <64 x i8>, ptr [[X2P]], align 64
; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP10]], align 64
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <16 x i32> [[X2]] to <64 x i8>
-; CHECK-NEXT: [[TMP32:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast <16 x i32> [[_MSLD]] to <64 x i8>
-; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <64 x i8> [[TMP32]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = load <64 x i8>, ptr [[TMP10]], align 64
; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <64 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <64 x i8> [[TMP30]], zeroinitializer
+; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <64 x i8> [[X1]], zeroinitializer
; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <64 x i8> [[TMP31]], zeroinitializer
-; CHECK-NEXT: [[TMP38:%.*]] = and <64 x i1> [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP58:%.*]] = and <64 x i1> [[TMP36]], [[TMP35]]
-; CHECK-NEXT: [[TMP59:%.*]] = and <64 x i1> [[TMP34]], [[TMP37]]
+; CHECK-NEXT: [[TMP38:%.*]] = and <64 x i1> [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP58:%.*]] = and <64 x i1> [[TMP32]], [[TMP36]]
+; CHECK-NEXT: [[TMP59:%.*]] = and <64 x i1> [[TMP35]], [[TMP37]]
; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <64 x i1> [[TMP61]] to <64 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i8> [[TMP62]] to <32 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <32 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <32 x i1> [[TMP64]] to <32 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <32 x i16> [[TMP65]] to i512
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i512 [[TMP66]] to <16 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP62]] to <16 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP29]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <64 x i8> [[X1]], <64 x i8> [[TMP31]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3]] to <16 x i1>
; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP2]]
@@ -230,27 +200,21 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP17]], <16 x i32> [[TMP14]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP11]], <16 x i32> [[X0]]
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast <16 x i32> [[X1]] to <64 x i8>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i32> [[X4]] to <64 x i8>
-; CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i32> [[TMP3]] to <64 x i8>
-; CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i32> [[TMP5]] to <64 x i8>
-; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <64 x i8> [[TMP41]], zeroinitializer
-; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <64 x i8> [[TMP42]], zeroinitializer
-; CHECK-NEXT: [[TMP45:%.*]] = icmp ne <64 x i8> [[TMP39]], zeroinitializer
+; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <64 x i8> [[TMP33]], zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <64 x i8> [[TMP40]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = and <64 x i1> [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP48:%.*]] = and <64 x i1> [[TMP45]], [[TMP44]]
-; CHECK-NEXT: [[TMP49:%.*]] = and <64 x i1> [[TMP43]], [[TMP46]]
+; CHECK-NEXT: [[TMP41:%.*]] = icmp ne <64 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <64 x i8> [[X4]], zeroinitializer
+; CHECK-NEXT: [[TMP47:%.*]] = and <64 x i1> [[TMP39]], [[TMP46]]
+; CHECK-NEXT: [[TMP48:%.*]] = and <64 x i1> [[TMP41]], [[TMP46]]
+; CHECK-NEXT: [[TMP49:%.*]] = and <64 x i1> [[TMP39]], [[TMP42]]
; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <64 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <64 x i1> [[TMP51]] to <64 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP52]] to <32 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <32 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <32 x i1> [[TMP54]] to <32 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i16> [[TMP55]] to i512
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i512 [[TMP56]] to <16 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i8> [[TMP52]] to <16 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <16 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <16 x i1> [[TMP56]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i32> [[TMP57]], [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X4]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <64 x i8> [[X1]], <64 x i8> [[X4]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i16 [[X3]] to <16 x i1>
; CHECK-NEXT: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x i32> [[_MSPROP3]], <16 x i32> zeroinitializer
@@ -266,11 +230,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; CHECK-NEXT: store { <16 x i32>, <16 x i32> } [[TMP28]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret { <16 x i32>, <16 x i32> } [[RES2]]
;
- %x2 = load <16 x i32>, ptr %x2p
- %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %x2 = load <64 x i8>, ptr %x2p
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2)
%2 = bitcast i16 %x3 to <16 x i1>
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
- %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x4)
%5 = bitcast i16 %x3 to <16 x i1>
%6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
%res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll
index 678faef20332..1de2a54486e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll
@@ -10,151 +10,127 @@
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <32 x i8>, <32 x i8>)
-define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) sanitize_memory {
+define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <8 x i32> @test_int_x86_avx_vpdpbusd_256(
-; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-SAME: <8 x i32> [[X0:%.*]], <32 x i8> [[X1:%.*]], <32 x i8> [[X2:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to <32 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <32 x i8> [[X1]], <32 x i8> [[X2]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[RES]]
;
- %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
ret <8 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <16 x i8>, <16 x i8>)
-define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) sanitize_memory {
+define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <4 x i32> @test_int_x86_avx_vpdpbusd_128(
-; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-SAME: <4 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], <16 x i8> [[X2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i8> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <16 x i8> [[X1]], <16 x i8> [[X2]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
- %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
ret <4 x i32> %res
}
-declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <32 x i8>, <32 x i8>)
-define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) sanitize_memory {
+define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <8 x i32> @test_int_x86_avx_vpdpbusds_256(
-; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-SAME: <8 x i32> [[X0:%.*]], <32 x i8> [[X1:%.*]], <32 x i8> [[X2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[X2]] to <32 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to <32 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to <32 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <32 x i8> [[X1]], <32 x i8> [[X2]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[RES]]
;
- %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
ret <8 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <16 x i8>, <16 x i8>)
-define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) sanitize_memory {
+define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) sanitize_memory {
; CHECK-LABEL: define <4 x i32> @test_int_x86_avx_vpdpbusds_128(
-; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-SAME: <4 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], <16 x i8> [[X2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[X2]] to <16 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i8> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i8> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i1> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i8> [[X1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i8> [[X2]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i1> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP6]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP10]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
+; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <16 x i8> [[X1]], <16 x i8> [[X2]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
- %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
ret <4 x i32> %res
}
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll
index b36d09bfb594..3df0f1df153c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll
@@ -45,11 +45,9 @@ define <4 x i32>@test_int_x86_avx2_vpdpbssd_128(<4 x i32> %x0, <4 x i32> %x1, pt
; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i1> [[TMP18]], [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i1> [[TMP21]], [[TMP20]]
; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP22]] to <16 x i8>
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP23]] to <8 x i16>
-; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i16> [[TMP24]], zeroinitializer
-; CHECK-NEXT: [[TMP26:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i16>
-; CHECK-NEXT: [[TMP27:%.*]] = bitcast <8 x i16> [[TMP26]] to i128
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast i128 [[TMP27]] to <4 x i32>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP23]] to <4 x i32>
+; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <4 x i32> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[TMP28:%.*]] = sext <4 x i1> [[TMP25]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP28]], [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
@@ -66,11 +64,9 @@ define <4 x i32>@test_int_x86_avx2_vpdpbssd_128(<4 x i32> %x0, <4 x i32> %x1, pt
; CHECK-NEXT: [[TMP42:%.*]] = or <16 x i1> [[TMP39]], [[TMP40]]
; CHECK-NEXT: [[TMP43:%.*]] = or <16 x i1> [[TMP42]], [[TMP41]]
; CHECK-NEXT: [[TMP44:%.*]] = sext <16 x i1> [[TMP43]] to <16 x i8>
-; CHECK-NEXT: [[TMP45:%.*]] = bitcast <16 x i8> [[TMP44]] to <8 x i16>
-; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i16> [[TMP45]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i16>
-; CHECK-NEXT: [[TMP48:%.*]] = bitcast <8 x i16> [[TMP47]] to i128
-; CHECK-NEXT: [[TMP49:%.*]] = bitcast i128 [[TMP48]] to <4 x i32>
+; CHECK-NEXT: [[TMP45:%.*]] = bitcast <16 x i8> [[TMP44]] to <4 x i32>
+; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <4 x i32> [[TMP45]], zeroinitializer
+; CHECK-NEXT: [[TMP49:%.*]] = sext <4 x i1> [[TMP46]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[TMP49]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]])
; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSPROP1]], [[_MSPROP3]]
@@ -120,11 +116,9 @@ define <4 x i32>@test_int_x86_avx2_vpdpbssds_128(<4 x i32> %x0, <4 x i32> %x1, p
; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i1> [[TMP18]], [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i1> [[TMP21]], [[TMP20]]
; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP22]] to <16 x i8>
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP23]] to <8 x i16>
-; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i16> [[TMP24]], zeroinitializer
-; CHECK-NEXT: [[TMP26:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i16>
-; CHECK-NEXT: [[TMP27:%.*]] = bitcast <8 x i16> [[TMP26]] to i128
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast i128 [[TMP27]] to <4 x i32>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP23]] to <4 x i32>
+; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <4 x i32> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[TMP28:%.*]] = sext <4 x i1> [[TMP25]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP28]], [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8>
@@ -141,11 +135,9 @@ define <4 x i32>@test_int_x86_avx2_vpdpbssds_128(<4 x i32> %x0, <4 x i32> %x1, p
; CHECK-NEXT: [[TMP42:%.*]] = or <16 x i1> [[TMP39]], [[TMP40]]
; CHECK-NEXT: [[TMP43:%.*]] = or <16 x i1> [[TMP42]], [[TMP41]]
; CHECK-NEXT: [[TMP44:%.*]] = sext <16 x i1> [[TMP43]] to <16 x i8>
-; CHECK-NEXT: [[TMP45:%.*]] = bitcast <16 x i8> [[TMP44]] to <8 x i16>
-; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i16> [[TMP45]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i16>
-; CHECK-NEXT: [[TMP48:%.*]] = bitcast <8 x i16> [[TMP47]] to i128
-; CHECK-NEXT: [[TMP49:%.*]] = bitcast i128 [[TMP48]] to <4 x i32>
+; CHECK-NEXT: [[TMP45:%.*]] = bitcast <16 x i8> [[TMP44]] to <4 x i32>
+; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <4 x i32> [[TMP45]], zeroinitializer
+; CHECK-NEXT: [[TMP49:%.*]] = sext <4 x i1> [[TMP46]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[TMP49]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]])
; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSPROP1]], [[_MSPROP3]]
@@ -195,11 +187,9 @@ define <8 x i32>@test_int_x86_avx2_vpdpbssd_256(<8 x i32> %x0, <8 x i32> %x1, pt
; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i1> [[TMP18]], [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP21]], [[TMP20]]
; CHECK-NEXT: [[TMP23:%.*]] = sext <32 x i1> [[TMP22]] to <32 x i8>
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP23]] to <16 x i16>
-; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i16> [[TMP24]], zeroinitializer
-; CHECK-NEXT: [[TMP26:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i16>
-; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i16> [[TMP26]] to i256
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast i256 [[TMP27]] to <8 x i32>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP23]] to <8 x i32>
+; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i32> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[TMP28:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP28]], [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
@@ -216,11 +206,9 @@ define <8 x i32>@test_int_x86_avx2_vpdpbssd_256(<8 x i32> %x0, <8 x i32> %x1, pt
; CHECK-NEXT: [[TMP42:%.*]] = or <32 x i1> [[TMP39]], [[TMP40]]
; CHECK-NEXT: [[TMP43:%.*]] = or <32 x i1> [[TMP42]], [[TMP41]]
; CHECK-NEXT: [[TMP44:%.*]] = sext <32 x i1> [[TMP43]] to <32 x i8>
-; CHECK-NEXT: [[TMP45:%.*]] = bitcast <32 x i8> [[TMP44]] to <16 x i16>
-; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <16 x i16> [[TMP45]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = sext <16 x i1> [[TMP46]] to <16 x i16>
-; CHECK-NEXT: [[TMP48:%.*]] = bitcast <16 x i16> [[TMP47]] to i256
-; CHECK-NEXT: [[TMP49:%.*]] = bitcast i256 [[TMP48]] to <8 x i32>
+; CHECK-NEXT: [[TMP45:%.*]] = bitcast <32 x i8> [[TMP44]] to <8 x i32>
+; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i32> [[TMP45]], zeroinitializer
+; CHECK-NEXT: [[TMP49:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP49]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]])
; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i32> [[_MSPROP1]], [[_MSPROP3]]
@@ -270,11 +258,9 @@ define <8 x i32>@test_int_x86_avx2_vpdpbssds_256(<8 x i32> %x0, <8 x i32> %x1, p
; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i1> [[TMP18]], [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP21]], [[TMP20]]
; CHECK-NEXT: [[TMP23:%.*]] = sext <32 x i1> [[TMP22]] to <32 x i8>
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP23]] to <16 x i16>
-; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i16> [[TMP24]], zeroinitializer
-; CHECK-NEXT: [[TMP26:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i16>
-; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i16> [[TMP26]] to i256
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast i256 [[TMP27]] to <8 x i32>
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP23]] to <8 x i32>
+; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i32> [[TMP24]], zeroinitializer
+; CHECK-NEXT: [[TMP28:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP28]], [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8>
@@ -291,11 +277,9 @@ define <8 x i32>@test_int_x86_avx2_vpdpbssds_256(<8 x i32> %x0, <8 x i32> %x1, p
; CHECK-NEXT: [[TMP42:%.*]] = or <32 x i1> [[TMP39]], [[TMP40]]
; CHECK-NEXT: [[TMP43:%.*]] = or <32 x i1> [[TMP42]], [[TMP41]]
; CHECK-NEXT: [[TMP44:%.*]] = sext <32 x i1> [[TMP43]] to <32 x i8>
-; CHECK-NEXT: [[TMP45:%.*]] = bitcast <32 x i8> [[TMP44]] to <16 x i16>
-; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <16 x i16> [[TMP45]], zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = sext <16 x i1> [[TMP46]] to <16 x i16>
-; CHECK-NEXT: [[TMP48:%.*]] = bitcast <16 x i16> [[TMP47]] to i256
-; CHECK-NEXT: [[TMP49:%.*]] = bitcast i256 [[TMP48]] to <8 x i32>
+; CHECK-NEXT: [[TMP45:%.*]] = bitcast <32 x i8> [[TMP44]] to <8 x i32>
+; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i32> [[TMP45]], zeroinitializer
+; CHECK-NEXT: [[TMP49:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP49]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]])
; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i32> [[_MSPROP1]], [[_MSPROP3]]