summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2025-09-10 15:25:31 -0700
committerGitHub <noreply@github.com>2025-09-10 15:25:31 -0700
commit1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch)
tree57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/test/CodeGen/X86
parent898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff)
parentb8cefcb601ddaa18482555c4ff363c01a270c2fe (diff)
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll11
-rw-r--r--llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll39
-rw-r--r--llvm/test/CodeGen/X86/AMX/amx-across-func.ll4
-rw-r--r--llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir152
-rw-r--r--llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll6
-rw-r--r--llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll2
-rw-r--r--llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll2
-rw-r--r--llvm/test/CodeGen/X86/apx/cf.ll18
-rw-r--r--llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll8
-rw-r--r--llvm/test/CodeGen/X86/avg-mask.ll16
-rw-r--r--llvm/test/CodeGen/X86/avx10.2-fma-commute.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx10_2-cmp.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2bf16-arith.ll25
-rw-r--r--llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-arith.ll42
-rw-r--r--llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll6
-rw-r--r--llvm/test/CodeGen/X86/avx512copy-intrinsics.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll16
-rw-r--r--llvm/test/CodeGen/X86/avx512fp16-mov.ll24
-rw-r--r--llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll56
-rw-r--r--llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll42
-rw-r--r--llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll28
-rw-r--r--llvm/test/CodeGen/X86/avx_vnni-intrinsics-upgrade.ll47
-rw-r--r--llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll24
-rw-r--r--llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll107
-rw-r--r--llvm/test/CodeGen/X86/bswap-inline-asm.ll116
-rw-r--r--llvm/test/CodeGen/X86/call-graph-section.ll6
-rw-r--r--llvm/test/CodeGen/X86/combine-gfni.ll101
-rw-r--r--llvm/test/CodeGen/X86/combine-vpmadd52.ll400
-rw-r--r--llvm/test/CodeGen/X86/comi-flags.ll2
-rw-r--r--llvm/test/CodeGen/X86/evex512-mem.ll29
-rw-r--r--llvm/test/CodeGen/X86/expand-large-fp-optnone.ll252
-rw-r--r--llvm/test/CodeGen/X86/fminimum-fmaximum.ll2
-rw-r--r--llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll2
-rw-r--r--llvm/test/CodeGen/X86/fp16-reload.mir34
-rw-r--r--llvm/test/CodeGen/X86/fp16-spill.ll64
-rw-r--r--llvm/test/CodeGen/X86/fpenv.ll238
-rw-r--r--llvm/test/CodeGen/X86/freeze.ll45
-rw-r--r--llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll580
-rw-r--r--llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll6
-rw-r--r--llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll40
-rw-r--r--llvm/test/CodeGen/X86/isel-ceil.ll95
-rw-r--r--llvm/test/CodeGen/X86/isel-floor.ll95
-rw-r--r--llvm/test/CodeGen/X86/isel-ftrunc.ll95
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.acos.ll (renamed from llvm/test/CodeGen/X86/llvm.acos.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.asin.ll (renamed from llvm/test/CodeGen/X86/llvm.asin.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.atan.ll (renamed from llvm/test/CodeGen/X86/llvm.atan.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.atan2.ll (renamed from llvm/test/CodeGen/X86/llvm.atan2.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.cos.ll (renamed from llvm/test/CodeGen/X86/llvm.cos.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.cosh.ll (renamed from llvm/test/CodeGen/X86/llvm.cosh.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.set.rounding.ll294
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.sin.ll (renamed from llvm/test/CodeGen/X86/llvm.sin.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.sincos.ll (renamed from llvm/test/CodeGen/X86/llvm.sincos.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.sinh.ll (renamed from llvm/test/CodeGen/X86/llvm.sinh.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.tan.ll (renamed from llvm/test/CodeGen/X86/llvm.tan.ll)0
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.tanh.ll (renamed from llvm/test/CodeGen/X86/llvm.tanh.ll)0
-rw-r--r--llvm/test/CodeGen/X86/kmov.ll51
-rw-r--r--llvm/test/CodeGen/X86/llrint-conv.ll256
-rw-r--r--llvm/test/CodeGen/X86/llround-conv.ll404
-rw-r--r--llvm/test/CodeGen/X86/lrint-conv-i32.ll259
-rw-r--r--llvm/test/CodeGen/X86/lrint-conv-i64.ll317
-rw-r--r--llvm/test/CodeGen/X86/lround-conv-i32.ll176
-rw-r--r--llvm/test/CodeGen/X86/lround-conv-i64.ll190
-rw-r--r--llvm/test/CodeGen/X86/lvi-hardening-ret.ll12
-rw-r--r--llvm/test/CodeGen/X86/midpoint-int-vec-256.ll276
-rw-r--r--llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll2
-rw-r--r--llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll2
-rw-r--r--llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll17
-rw-r--r--llvm/test/CodeGen/X86/opt-pipeline.ll2
-rw-r--r--llvm/test/CodeGen/X86/peep-test-5.ll51
-rw-r--r--llvm/test/CodeGen/X86/pr156256.ll25
-rw-r--r--llvm/test/CodeGen/X86/pr156817.ll23
-rw-r--r--llvm/test/CodeGen/X86/pr38795.ll7
-rw-r--r--llvm/test/CodeGen/X86/pr40289-64bit.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr40289.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr67333.ll20
-rw-r--r--llvm/test/CodeGen/X86/pr90844.ll36
-rw-r--r--llvm/test/CodeGen/X86/shift-i128.ll203
-rw-r--r--llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll40
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-128-fp16.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-128.ll3
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-256.ll229
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-128.ll3
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-256.ll172
-rw-r--r--llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll66
-rw-r--r--llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir131
-rw-r--r--llvm/test/CodeGen/X86/win64-eh-unwindv2.ll37
-rw-r--r--llvm/test/CodeGen/X86/xor-not-combine.ll29
107 files changed, 4894 insertions, 1410 deletions
diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 8d690ba06e3b..654169377609 100644
--- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -13,25 +13,24 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: pushl %eax
-; CHECK-NEXT: movb $1, %cl
+; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: movl $1, %ebx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb.i5
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: addl %ebx, %ebx
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb $1, %cl
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: # %bb.2: # %mp_unexp_mp2d.exit.i
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.5: # %cond_next.i
-; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: jne .LBB0_3
; CHECK-NEXT: # %bb.6: # %cond_next36.i
; CHECK-NEXT: movl $0, 0
-; CHECK-NEXT: movzbl %al, %ebp
+; CHECK-NEXT: movzbl %cl, %ebp
; CHECK-NEXT: andl $1, %ebp
; CHECK-NEXT: xorpd %xmm0, %xmm0
; CHECK-NEXT: xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index bf939c413108..3913e93b83a6 100644
--- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -38,7 +38,6 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
; CHECK-NEXT: ## %bb.1: ## %bb116.i
; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.2: ## %bb52.i.i
-; CHECK-NEXT: testb $1, %bl
; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.3: ## %bb142.i
; CHECK-NEXT: je LBB0_25
@@ -49,23 +48,23 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
; CHECK-NEXT: jmp LBB0_5
; CHECK-NEXT: LBB0_21: ## %bb7806
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: Ltmp16:
+; CHECK-NEXT: Ltmp16: ## EH_LABEL
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll __ZN12wxStringBase6appendEmw
-; CHECK-NEXT: Ltmp17:
+; CHECK-NEXT: Ltmp17: ## EH_LABEL
; CHECK-NEXT: LBB0_5: ## %bb3261
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpl $37, 0
; CHECK-NEXT: jne LBB0_25
; CHECK-NEXT: ## %bb.6: ## %bb3306
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: Ltmp0:
+; CHECK-NEXT: Ltmp0: ## EH_LABEL
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll __ZN12wxStringBaseaSEPKw
-; CHECK-NEXT: Ltmp1:
+; CHECK-NEXT: Ltmp1: ## EH_LABEL
; CHECK-NEXT: ## %bb.7: ## %bb3314
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: movl 0, %eax
@@ -89,11 +88,11 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
; CHECK-NEXT: je LBB0_14
; CHECK-NEXT: ## %bb.13: ## %bb155.i8541
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: Ltmp4:
+; CHECK-NEXT: Ltmp4: ## EH_LABEL
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll _gmtime_r
-; CHECK-NEXT: Ltmp5:
+; CHECK-NEXT: Ltmp5: ## EH_LABEL
; CHECK-NEXT: LBB0_14: ## %bb182.i8560
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: testb $1, %bl
@@ -103,7 +102,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
; CHECK-NEXT: je LBB0_18
; CHECK-NEXT: ## %bb.17: ## %bb440.i8663
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: Ltmp6:
+; CHECK-NEXT: Ltmp6: ## EH_LABEL
; CHECK-NEXT: movl L_.str4$non_lazy_ptr, %eax
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl L_.str33$non_lazy_ptr, %eax
@@ -113,47 +112,47 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
; CHECK-NEXT: movl %ebp, (%esp)
; CHECK-NEXT: movl $1717, {{[0-9]+}}(%esp) ## imm = 0x6B5
; CHECK-NEXT: calll __Z10wxOnAssertPKwiPKcS0_S0_
-; CHECK-NEXT: Ltmp7:
+; CHECK-NEXT: Ltmp7: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_18
; CHECK-NEXT: LBB0_15: ## %bb187.i8591
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: jne LBB0_25
; CHECK-NEXT: LBB0_18: ## %invcont5814
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: Ltmp8:
+; CHECK-NEXT: Ltmp8: ## EH_LABEL
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz
; CHECK-NEXT: subl $4, %esp
-; CHECK-NEXT: Ltmp9:
+; CHECK-NEXT: Ltmp9: ## EH_LABEL
; CHECK-NEXT: ## %bb.19: ## %invcont5831
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: Ltmp10:
+; CHECK-NEXT: Ltmp10: ## EH_LABEL
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll __ZN12wxStringBase10ConcatSelfEmPKwm
-; CHECK-NEXT: Ltmp11:
+; CHECK-NEXT: Ltmp11: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_5
; CHECK-NEXT: LBB0_9: ## %bb5657
-; CHECK-NEXT: Ltmp13:
+; CHECK-NEXT: Ltmp13: ## EH_LABEL
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE
-; CHECK-NEXT: Ltmp14:
+; CHECK-NEXT: Ltmp14: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: LBB0_20: ## %bb5968
-; CHECK-NEXT: Ltmp2:
+; CHECK-NEXT: Ltmp2: ## EH_LABEL
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz
; CHECK-NEXT: subl $4, %esp
-; CHECK-NEXT: Ltmp3:
+; CHECK-NEXT: Ltmp3: ## EH_LABEL
; CHECK-NEXT: LBB0_25: ## %bb115.critedge.i
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: addl $28, %esp
@@ -163,13 +162,13 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl $4
; CHECK-NEXT: LBB0_23: ## %lpad.loopexit.split-lp
-; CHECK-NEXT: Ltmp15:
+; CHECK-NEXT: Ltmp15: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: LBB0_24: ## %lpad8185
-; CHECK-NEXT: Ltmp12:
+; CHECK-NEXT: Ltmp12: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: LBB0_22: ## %lpad.loopexit
-; CHECK-NEXT: Ltmp18:
+; CHECK-NEXT: Ltmp18: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: Lfunc_end0:
entry:
diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll
index 320c96535abb..2bda8db04029 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll
@@ -139,12 +139,12 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; O0-NEXT: callq foo
; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload
; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
+; O0-NEXT: movl $32, %esi
+; O0-NEXT: movl $buf+2048, %edx
; O0-NEXT: # implicit-def: $al
; O0-NEXT: movb %al, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
-; O0-NEXT: movl $32, %esi
-; O0-NEXT: movl $buf+2048, %edx
; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0
; O0-NEXT: movl $64, %esi
; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
diff --git a/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir b/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir
new file mode 100644
index 000000000000..82049dce8a45
--- /dev/null
+++ b/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir
@@ -0,0 +1,152 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass="fastpretileconfig,regallocfast,fasttileconfig" -verify-machineinstrs -o - %s | FileCheck %s
+
+# Test to verify that ldtilecfg instructions are sinked closer to tile defining
+# instructions after a call. This ensures call does not overwrite values in
+# registers being used for configuring the AMX tile.
+
+...
+---
+name: test_api
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 1, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 2, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 3, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 4, class: tile, preferred-register: '', flags: [ ] }
+ - { id: 5, class: gr64_nosp, preferred-register: '', flags: [ ] }
+ - { id: 6, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 9, class: gr64_nosp, preferred-register: '', flags: [ ] }
+ - { id: 10, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 13, class: tile, preferred-register: '', flags: [ ] }
+ - { id: 14, class: gr64_nosp, preferred-register: '', flags: [ ] }
+ - { id: 15, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 18, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 19, class: gr64_nosp, preferred-register: '', flags: [ ] }
+ - { id: 22, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 23, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 24, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 25, class: tile, preferred-register: '', flags: [ ] }
+ - { id: 26, class: gr64_nosp, preferred-register: '', flags: [ ] }
+ - { id: 29, class: gr64_nosp, preferred-register: '', flags: [ ] }
+ - { id: 30, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 33, class: tile, preferred-register: '', flags: [ ] }
+ - { id: 34, class: gr64_nosp, preferred-register: '', flags: [ ] }
+ - { id: 35, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 38, class: gr64_nosp, preferred-register: '', flags: [ ] }
+ - { id: 39, class: gr64, preferred-register: '', flags: [ ] }
+ - { id: 40, class: gr16, preferred-register: '', flags: [ ] }
+ - { id: 41, class: gr16, preferred-register: '', flags: [ ] }
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%2' }
+frameInfo:
+ adjustsStack: true
+ maxAlignment: 1024
+stack:
+ - { id: 0, size: 1024, alignment: 1024 }
+ - { id: 1, size: 1024, alignment: 1024 }
+ - { id: 2, size: 32, alignment: 32 }
+ - { id: 3, size: 32, alignment: 32 }
+ - { id: 4, size: 8, alignment: 8 }
+machineFunctionInfo:
+ amxProgModel: ManagedRA
+body: |
+ bb.0.entry:
+ liveins: $rdi, $rsi
+
+ ; CHECK-LABEL: name: test_api
+ ; CHECK: liveins: $rdi, $rsi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $zmm0 = AVX512_512_SET0
+ ; CHECK-NEXT: VMOVUPSZmr %stack.5, 1, $noreg, 0, $noreg, killed renamable $zmm0 :: (store (s512) into %stack.5, align 4)
+ ; CHECK-NEXT: MOV8mi %stack.5, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.5, align 4)
+ ; CHECK-NEXT: MOV64mr %stack.8, 1, $noreg, 0, $noreg, $rsi :: (store (s64) into %stack.8)
+ ; CHECK-NEXT: renamable $rsi = MOV32ri64 16
+ ; CHECK-NEXT: renamable $rdx = LEA64r %stack.2, 1, $noreg, 0, $noreg
+ ; CHECK-NEXT: renamable $cx = MOV16ri 16
+ ; CHECK-NEXT: MOV16mr %stack.7, 1, $noreg, 0, $noreg, $cx :: (store (s16) into %stack.7)
+ ; CHECK-NEXT: renamable $ax = MOV16ri 2
+ ; CHECK-NEXT: MOV16mr %stack.6, 1, $noreg, 0, $noreg, $ax :: (store (s16) into %stack.6)
+ ; CHECK-NEXT: $al = IMPLICIT_DEF
+ ; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4)
+ ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4)
+ ; CHECK-NEXT: $al = IMPLICIT_DEF
+ ; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4)
+ ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4)
+ ; CHECK-NEXT: PLDTILECFGV %stack.5, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.5, align 4)
+ ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg
+ ; CHECK-NEXT: renamable $rsi = MOV32ri64 64
+ ; CHECK-NEXT: renamable $rdx = LEA64r %stack.1, 1, $noreg, 0, $noreg
+ ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0
+ ; CHECK-NEXT: renamable $rsi = MOV32ri64 64
+ ; CHECK-NEXT: renamable $rdx = LEA64r %stack.1, 1, $noreg, 0, $noreg
+ ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg
+ ; CHECK-NEXT: renamable $rdx = MOV32ri64 16
+ ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdi, 1, killed renamable $rdx, 0, $noreg, killed renamable $tmm0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def dead $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 &foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax
+ ; CHECK-NEXT: $rsi = MOV64rm %stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %stack.8)
+ ; CHECK-NEXT: $cx = MOV16rm %stack.7, 1, $noreg, 0, $noreg :: (load (s16) from %stack.7)
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def dead $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: renamable $rdx = COPY $rax
+ ; CHECK-NEXT: $ax = MOV16rm %stack.6, 1, $noreg, 0, $noreg :: (load (s16) from %stack.6)
+ ; CHECK-NEXT: MOV64mr killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $rdx
+ ; CHECK-NEXT: renamable $rdx = MOV64rm %stack.4, 1, $noreg, 0, $noreg
+ ; CHECK-NEXT: renamable $rsi = MOV32ri64 16
+ ; CHECK-NEXT: $al = IMPLICIT_DEF
+ ; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4)
+ ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4)
+ ; CHECK-NEXT: $al = IMPLICIT_DEF
+ ; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4)
+ ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4)
+ ; CHECK-NEXT: PLDTILECFGV %stack.5, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.5, align 4)
+ ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg
+ ; CHECK-NEXT: renamable $rsi = MOV32ri64 64
+ ; CHECK-NEXT: renamable $rdx = LEA64r %stack.0, 1, $noreg, 0, $noreg
+ ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0
+ ; CHECK-NEXT: renamable $rsi = MOV32ri64 64
+ ; CHECK-NEXT: renamable $rdx = LEA64r %stack.0, 1, $noreg, 0, $noreg
+ ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg
+ ; CHECK-NEXT: renamable $rsi = MOV32ri64 16
+ ; CHECK-NEXT: renamable $rdx = LEA64r %stack.4, 1, $noreg, 0, $noreg
+ ; CHECK-NEXT: PTILESTOREDV killed renamable $ax, killed renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0
+ ; CHECK-NEXT: RET64
+ %2:gr64 = COPY $rsi
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY killed %0
+ %3:gr64 = COPY killed %2
+ %38:gr64_nosp = MOV32ri64 16
+ %39:gr64 = LEA64r %stack.2, 1, $noreg, 0, $noreg
+ %40:gr16 = MOV16ri 16
+ %41:gr16 = MOV16ri 2
+ %33:tile = PTILELOADDV %41:gr16, %40:gr16, killed %39, 1, killed %38, 0, $noreg
+ %34:gr64_nosp = MOV32ri64 64
+ %35:gr64 = LEA64r %stack.1, 1, $noreg, 0, $noreg
+ PTILESTOREDV %41:gr16, %40:gr16, killed %35, 1, killed %34, 0, $noreg, %33
+ %29:gr64_nosp = MOV32ri64 64
+ %30:gr64 = LEA64r %stack.1, 1, $noreg, 0, $noreg
+ %25:tile = PTILELOADDV %41:gr16, %40:gr16, killed %30, 1, killed %29, 0, $noreg
+ %26:gr64_nosp = MOV32ri64 16
+ PTILESTOREDV %41:gr16, %40:gr16, %1, 1, killed %26, 0, $noreg, %25
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 &foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+ %24:gr64 = COPY $rax
+ MOV64mr %3, 1, $noreg, 0, $noreg, %24
+ %22:gr64 = MOV64rm %stack.4, 1, $noreg, 0, $noreg
+ %19:gr64_nosp = MOV32ri64 16
+ %13:tile = PTILELOADDV %41:gr16, %40:gr16, %22, 1, killed %19, 0, $noreg
+ %14:gr64_nosp = MOV32ri64 64
+ %15:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg
+ PTILESTOREDV %41:gr16, %40:gr16, killed %15, 1, killed %14, 0, $noreg, %13
+ %9:gr64_nosp = MOV32ri64 64
+ %10:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg
+ %4:tile = PTILELOADDV %41:gr16, %40:gr16, killed %10, 1, killed %9, 0, $noreg
+ %5:gr64_nosp = MOV32ri64 16
+ %6:gr64 = LEA64r %stack.4, 1, $noreg, 0, $noreg
+ PTILESTOREDV %41:gr16, %40:gr16, killed %6, 1, killed %5, 0, $noreg, %4
+ RET64
+...
diff --git a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
index 71f8f231747f..885bc805d655 100644
--- a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
+++ b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s
@buf = dso_local global [3072 x i8] zeroinitializer, align 64
diff --git a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
index 8f82bd2587ec..41208d6adb30 100644
--- a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2 | FileCheck %s
define <16 x float> @test_tcvtrowd2ps(i32 %A) {
; CHECK-LABEL: test_tcvtrowd2ps:
diff --git a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll
index fd3925fabc51..dc8252ae7aca 100644
--- a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll
+++ b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx10.2-512, \
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx10.2, \
; RUN: -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s
define void @test_amx(i8* %pointer, i8* %base, i32 %index, i64 %stride) {
diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll
index e52ce6ca815b..b2651e91134e 100644
--- a/llvm/test/CodeGen/X86/apx/cf.ll
+++ b/llvm/test/CodeGen/X86/apx/cf.ll
@@ -229,3 +229,21 @@ entry:
call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %p, i32 1, <1 x i1> %1)
ret void
}
+
+define i64 @redundant_test(i64 %num, ptr %p1, i64 %in) {
+; CHECK-LABEL: redundant_test:
+; CHECK: # %bb.0:
+; CHECK-NEXT: testl $-32, %edi
+; CHECK-NEXT: cfcmoveq (%rsi), %rax
+; CHECK-NEXT: {nf} addq %rdx, %rax
+; CHECK-NEXT: cmovneq %rdi, %rax
+; CHECK-NEXT: retq
+ %and = and i64 %num, 4294967264
+ %cmp = icmp eq i64 %and, 0
+ %mask = bitcast i1 %cmp to <1 x i1>
+ %condload = tail call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %p1, i32 8, <1 x i1> %mask, <1 x i64> poison)
+ %v = bitcast <1 x i64> %condload to i64
+ %add = add i64 %v, %in
+ %sel = select i1 %cmp, i64 %add, i64 %num
+ ret i64 %sel
+}
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll
index ad24608d338a..d6d4db350910 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll
@@ -81,7 +81,7 @@ define i32 @csr6_alloc16(ptr %argv) {
; LIN-NEXT: .cfi_def_cfa_offset 32
; LIN-NEXT: pop2 %rbp, %r15
; LIN-NEXT: .cfi_def_cfa_offset 16
-; LIN-NEXT: popq %rcx
+; LIN-NEXT: popq %rax
; LIN-NEXT: .cfi_def_cfa_offset 8
; LIN-NEXT: retq
;
@@ -116,7 +116,7 @@ define i32 @csr6_alloc16(ptr %argv) {
; LIN-PPX-NEXT: .cfi_def_cfa_offset 32
; LIN-PPX-NEXT: pop2p %rbp, %r15
; LIN-PPX-NEXT: .cfi_def_cfa_offset 16
-; LIN-PPX-NEXT: popq %rcx
+; LIN-PPX-NEXT: popq %rax
; LIN-PPX-NEXT: .cfi_def_cfa_offset 8
; LIN-PPX-NEXT: retq
;
@@ -180,7 +180,7 @@ define i32 @csr6_alloc16(ptr %argv) {
; WIN-NEXT: pop2 %rbp, %rbx
; WIN-NEXT: pop2 %r13, %r12
; WIN-NEXT: pop2 %r15, %r14
-; WIN-NEXT: popq %rcx
+; WIN-NEXT: popq %rax
; WIN-NEXT: .seh_endepilogue
; WIN-NEXT: retq
; WIN-NEXT: .seh_endproc
@@ -211,7 +211,7 @@ define i32 @csr6_alloc16(ptr %argv) {
; WIN-PPX-NEXT: pop2p %rbp, %rbx
; WIN-PPX-NEXT: pop2p %r13, %r12
; WIN-PPX-NEXT: pop2p %r15, %r14
-; WIN-PPX-NEXT: popq %rcx
+; WIN-PPX-NEXT: popq %rax
; WIN-PPX-NEXT: .seh_endepilogue
; WIN-PPX-NEXT: retq
; WIN-PPX-NEXT: .seh_endproc
diff --git a/llvm/test/CodeGen/X86/avg-mask.ll b/llvm/test/CodeGen/X86/avg-mask.ll
index e8866393e8b6..b148cd3d42df 100644
--- a/llvm/test/CodeGen/X86/avg-mask.ll
+++ b/llvm/test/CodeGen/X86/avg-mask.ll
@@ -177,11 +177,11 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
; AVX512F-NEXT: shrq $32, %rdi
; AVX512F-NEXT: shrq $48, %rax
; AVX512F-NEXT: shrl $16, %ecx
-; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT: vpavgb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: kmovw %ecx, %k2
; AVX512F-NEXT: kmovw %eax, %k3
; AVX512F-NEXT: kmovw %edi, %k4
@@ -364,11 +364,11 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nou
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
-; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT: vpavgw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll b/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll
index ab8ac4fbd419..b43b1f7b9c32 100644
--- a/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll
+++ b/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx10.2-512 | FileCheck %s
+; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx10.2 | FileCheck %s
define <8 x bfloat> @fma_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) {
; CHECK-LABEL: fma_123_v8bf16:
diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
index 0f90f1a0a356..566ce533683f 100644
--- a/llvm/test/CodeGen/X86/avx10_2-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefix=X86
define i1 @hoeq(half %x, half %y) {
; X64-LABEL: hoeq:
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
index c22a394e6c4e..79849a7153c9 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
define <32 x bfloat> @test_int_x86_avx10_vaddbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
; CHECK-LABEL: test_int_x86_avx10_vaddbf16512:
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
index cbac76e9de27..9225bd88b089 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
declare <32 x bfloat> @llvm.x86.avx10.vminbf16512(<32 x bfloat>, <32 x bfloat>)
diff --git a/llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll
index c4a904cc3bc4..cc87ae0aad1f 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
define <32 x half> @test_int_x86_avx10_vcvt2ps2phx512(<16 x float> %A, <16 x float> %B) {
; CHECK-LABEL: test_int_x86_avx10_vcvt2ps2phx512:
diff --git a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
index d7ad7b048c6d..c50da22193b2 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
; VCVTTPD2DQS
define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind {
diff --git a/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll
index b7713128f472..c27ee1680dea 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=X86
define <32 x bfloat> @test_int_x86_avx10_vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B) nounwind {
; X64-LABEL: test_int_x86_avx10_vminmaxbf16512:
diff --git a/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll
index b2e7caa15944..09eb53faaaad 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
; VNNI FP16
diff --git a/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll
index 8430b2e1c028..2e69b41d282b 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
define dso_local <8 x i64> @test_mm512_ipcvtbf16_epi8(<32 x bfloat> noundef %__A) {
; CHECK-LABEL: test_mm512_ipcvtbf16_epi8:
diff --git a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
index 652c35c77709..591349aabef4 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512(<8 x double> %x0, <8 x i32> %src, i8 %mask) {
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
index 435f67a0f1e4..0f2c75b15d5b 100644
--- a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
define <16 x bfloat> @test_int_x86_avx10_add_bf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
; CHECK-LABEL: test_int_x86_avx10_add_bf16_256:
@@ -1168,23 +1168,10 @@ entry:
}
define <32 x bfloat> @addv(<32 x bfloat> %a, <32 x bfloat> %b) nounwind {
-; X64-LABEL: addv:
-; X64: # %bb.0:
-; X64-NEXT: vaddbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
-; X64-NEXT: vaddbf16 %ymm3, %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xcb]
-; X64-NEXT: retq # encoding: [0xc3]
-;
-; X86-LABEL: addv:
-; X86: # %bb.0:
-; X86-NEXT: pushl %ebp # encoding: [0x55]
-; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
-; X86-NEXT: andl $-32, %esp # encoding: [0x83,0xe4,0xe0]
-; X86-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20]
-; X86-NEXT: vaddbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
-; X86-NEXT: vaddbf16 8(%ebp), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0x8d,0x08,0x00,0x00,0x00]
-; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
-; X86-NEXT: popl %ebp # encoding: [0x5d]
-; X86-NEXT: retl # encoding: [0xc3]
+; CHECK-LABEL: addv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%add = fadd <32 x bfloat> %a, %b
ret <32 x bfloat> %add
}
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
index ba32b2adc799..3efc8cc3d129 100644
--- a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
declare <16 x bfloat> @llvm.x86.avx10.vminbf16256(<16 x bfloat>, <16 x bfloat>)
diff --git a/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll
index 90e2146cc2c0..04c93eb1ee6d 100644
--- a/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
define <8 x half> @test_int_x86_avx10_vcvt2ps2phx128(<4 x float> %A, <4 x float> %B) {
; CHECK-LABEL: test_int_x86_avx10_vcvt2ps2phx128:
diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
index a2f167e94cc2..e0c2139b5e37 100644
--- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
+++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
;
; 32-bit float to signed integer
diff --git a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
index 916d439ab77f..8ae5b670764e 100644
--- a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=X86
define <8 x bfloat> @test_int_x86_avx10_vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B) nounwind {
; X64-LABEL: test_int_x86_avx10_vminmaxbf16128:
diff --git a/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
index ed5ae01448c5..0c5fd3bf9d24 100644
--- a/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
; VNNI FP16
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll
index 957523f87b7c..094637270503 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
define dso_local <2 x i64> @test_mm_ipcvtbf16_epi8(<8 x bfloat> noundef %__A) {
; CHECK-LABEL: test_mm_ipcvtbf16_epi8:
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
index e9b739074b45..38d54cff6dc2 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86
define i32 @test_x86_avx512_vcvttsd2usis(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_vcvttsd2usis:
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
index f5be929bc85c..c853da5d2168 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s
define i64 @test_x86_avx512_vcvttsd2si64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_vcvttsd2si64:
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
index 33819c9e0102..97ca0d88b7d4 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,EVEX512
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s --check-prefixes=CHECK,EVEX256
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK
; 256-bit
@@ -237,32 +236,19 @@ define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) {
}
define i16 @PR90356(<16 x i1> %a) {
-; EVEX512-LABEL: PR90356:
-; EVEX512: # %bb.0:
-; EVEX512-NEXT: vpsllw $7, %xmm0, %xmm0
-; EVEX512-NEXT: vpmovb2m %xmm0, %k1
-; EVEX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; EVEX512-NEXT: movb $63, %al
-; EVEX512-NEXT: kmovd %eax, %k1
-; EVEX512-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
-; EVEX512-NEXT: vptestmd %zmm0, %zmm0, %k0
-; EVEX512-NEXT: kmovd %k0, %eax
-; EVEX512-NEXT: # kill: def $ax killed $ax killed $eax
-; EVEX512-NEXT: vzeroupper
-; EVEX512-NEXT: retq
-;
-; EVEX256-LABEL: PR90356:
-; EVEX256: # %bb.0:
-; EVEX256-NEXT: vpsllw $7, %xmm0, %xmm0
-; EVEX256-NEXT: vpmovb2m %xmm0, %k0
-; EVEX256-NEXT: vpmovm2w %k0, %ymm0
-; EVEX256-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; EVEX256-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
-; EVEX256-NEXT: vpmovw2m %ymm0, %k0
-; EVEX256-NEXT: kmovd %k0, %eax
-; EVEX256-NEXT: # kill: def $ax killed $ax killed $eax
-; EVEX256-NEXT: vzeroupper
-; EVEX256-NEXT: retq
+; CHECK-LABEL: PR90356:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
+; CHECK-NEXT: vpmovb2m %xmm0, %k1
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT: movb $63, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%1 = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
%2 = bitcast <16 x i1> %1 to i16
ret i16 %2
diff --git a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
index e449c7192e4b..b60d7a5463d6 100644
--- a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
@@ -278,14 +278,14 @@ define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4
ret <4 x float> %res
}
-define <4 x float> @PR98306() {
+define <4 x float> @PR98306(i8 %m) {
; CHECK-LABEL: PR98306:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kxorw %k0, %k0, %k1
+; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7]
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9]
; CHECK-NEXT: vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 0, i32 4)
+ %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 %m, i32 4)
ret <4 x float> %res
}
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
index a2af7df44010..d09807e4a334 100644
--- a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=AVX102
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
diff --git a/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll b/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll
index 56d923d7c4cf..4a5c1fe5a2a0 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll
@@ -57,7 +57,7 @@ define <8 x half> @minsh(<8 x half> %va, ptr %pb) {
; CHECK-LABEL: minsh:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsh (%rdi), %xmm0, %xmm1
-; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmovsh {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%a = extractelement <8 x half> %va, i32 0
%b = load half, ptr %pb
@@ -70,7 +70,7 @@ define <8 x half> @maxsh(<8 x half> %va, ptr %pb) {
; CHECK-LABEL: maxsh:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsh (%rdi), %xmm0, %xmm1
-; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmovsh {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%a = extractelement <8 x half> %va, i32 0
%b = load half, ptr %pb
diff --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
index 627a94799424..44ea3ce64ccf 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
@@ -1361,3 +1361,19 @@ define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind
%res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <32 x half> %res
}
+
+define <8 x half> @PR153570(ptr %p) {
+; CHECK-LABEL: PR153570:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; CHECK-NEXT: vmulsh {rn-sae}, %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7]
+; CHECK-NEXT: vmovaps %xmm1, (%rdi)
+; CHECK-NEXT: retq
+ %r = tail call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <8 x half> <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, i8 0, i32 8)
+ store <8 x half> %r, ptr %p, align 16
+ %r1 = tail call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <8 x half> <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, i8 1, i32 8)
+ ret <8 x half> %r1
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index 526511c85045..316e3f27a0a1 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -303,7 +303,7 @@ define <8 x half> @test14(half %x) {
; X64-LABEL: test14:
; X64: # %bb.0:
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X64-NEXT: retq
;
; X86-LABEL: test14:
@@ -318,7 +318,7 @@ define <16 x half> @test14b(half %x) {
; X64VL-LABEL: test14b:
; X64VL: # %bb.0:
; X64VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64VL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64VL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X64VL-NEXT: retq
;
; X86-LABEL: test14b:
@@ -329,7 +329,7 @@ define <16 x half> @test14b(half %x) {
; X64-NOVL-LABEL: test14b:
; X64-NOVL: # %bb.0:
; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NOVL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64-NOVL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; X64-NOVL-NEXT: retq
@@ -341,7 +341,7 @@ define <32 x half> @test14c(half %x) {
; X64VL-LABEL: test14c:
; X64VL: # %bb.0:
; X64VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64VL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64VL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X64VL-NEXT: retq
;
; X86-LABEL: test14c:
@@ -352,7 +352,7 @@ define <32 x half> @test14c(half %x) {
; X64-NOVL-LABEL: test14c:
; X64-NOVL: # %bb.0:
; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NOVL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64-NOVL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
; X64-NOVL-NEXT: retq
@@ -1464,21 +1464,21 @@ define <8 x half> @movsh(<8 x half> %a, <8 x half> %b) {
; X64VL-LABEL: movsh:
; X64VL: # %bb.0:
; X64VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,14,15,0,1,2,3,4,5,6,7,14,15,10,11]
-; X64VL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64VL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X64VL-NEXT: vaddph %xmm0, %xmm2, %xmm0
; X64VL-NEXT: retq
;
; X86-LABEL: movsh:
; X86: # %bb.0:
; X86-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,14,15,0,1,2,3,4,5,6,7,14,15,10,11]
-; X86-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X86-NEXT: vaddph %xmm0, %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-NOVL-LABEL: movsh:
; X64-NOVL: # %bb.0:
; X64-NOVL-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,14,15,0,1,2,3,4,5,6,7,14,15,10,11]
-; X64-NOVL-NEXT: vmovsh %xmm0, %xmm1, %xmm3
+; X64-NOVL-NEXT: vmovsh {{.*#+}} xmm3 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X64-NOVL-NEXT: vpsrldq {{.*#+}} xmm4 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X64-NOVL-NEXT: vpsrldq {{.*#+}} xmm5 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X64-NOVL-NEXT: vaddsh %xmm4, %xmm5, %xmm4
@@ -2311,7 +2311,7 @@ define <8 x half> @test21(half %a, half %b, half %c) nounwind {
; X64-LABEL: test21:
; X64: # %bb.0:
; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; X64-NEXT: vmovsh %xmm2, %xmm3, %xmm2
+; X64-NEXT: vmovsh {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7]
; X64-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],zero,zero
; X64-NEXT: retq
@@ -2427,7 +2427,7 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width
; X64VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; X64VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; X64VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X64VL-NEXT: vmovsh %xmm0, %xmm2, %xmm0
+; X64VL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
; X64VL-NEXT: retq
;
; X86-LABEL: pr52561:
@@ -2443,7 +2443,7 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width
; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
@@ -2474,7 +2474,7 @@ define <8 x i16> @pr59628_xmm(i16 %arg) {
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X86-NEXT: vpbroadcastw %eax, %xmm1
-; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; X86-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %k1
; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll
index 7613c9ff43e2..b8ebe2a4890a 100644
--- a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll
@@ -2,18 +2,18 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
-declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <32 x i8>, <32 x i8>)
-define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
+define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpdpbusd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
ret <8 x i32> %1
}
-define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
+define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, ptr %x2p, <32 x i8> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_256:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
@@ -33,11 +33,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda]
; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
- %x2 = load <8 x i32>, ptr %x2p
- %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %x2 = load <32 x i8>, ptr %x2p
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
%2 = bitcast i8 %x3 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
- %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x4)
%5 = bitcast i8 %x3 to <8 x i1>
%6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
%res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
@@ -45,18 +45,18 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
ret { <8 x i32>, <8 x i32> } %res2
}
-declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <16 x i8>, <16 x i8>)
-define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
+define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpdpbusd_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
ret <4 x i32> %1
}
-define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
+define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, ptr %x2p, <16 x i8> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_128:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
@@ -76,12 +76,12 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda]
; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
- %x2 = load <4 x i32>, ptr %x2p
- %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %x2 = load <16 x i8>, ptr %x2p
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
%2 = bitcast i8 %x3 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
- %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x4)
%5 = bitcast i8 %x3 to <8 x i1>
%extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
@@ -90,18 +90,18 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
ret { <4 x i32>, <4 x i32> } %res2
}
-declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <32 x i8>, <32 x i8>)
-define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
+define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
ret <8 x i32> %1
}
-define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
+define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, ptr %x2p, <32 x i8> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_256:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
@@ -121,11 +121,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda]
; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
- %x2 = load <8 x i32>, ptr %x2p
- %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %x2 = load <32 x i8>, ptr %x2p
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
%2 = bitcast i8 %x3 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
- %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x4)
%5 = bitcast i8 %x3 to <8 x i1>
%6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
%res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
@@ -133,18 +133,18 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
ret { <8 x i32>, <8 x i32> } %res2
}
-declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <16 x i8>, <16 x i8>)
-define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
+define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
ret <4 x i32> %1
}
-define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
+define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, ptr %x2p, <16 x i8> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_128:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
@@ -164,12 +164,12 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32
; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda]
; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
- %x2 = load <4 x i32>, ptr %x2p
- %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %x2 = load <16 x i8>, ptr %x2p
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
%2 = bitcast i8 %x3 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
- %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x4)
%5 = bitcast i8 %x3 to <8 x i1>
%extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll
index 62c4d39e8615..63ff88a7fa4a 100644
--- a/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll
@@ -2,20 +2,31 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
-declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <16 x i32>@test_int_x86_avx512_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpdpbusd_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusd_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
ret <16 x i32> %res
}
-define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
-; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512:
+define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_maskz_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
+; X86-LABEL: test_int_x86_avx512_maskz_vpdpbusd_512:
; X86: # %bb.0:
; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
@@ -25,7 +36,7 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
-; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512:
+; X64-LABEL: test_int_x86_avx512_maskz_vpdpbusd_512:
; X64: # %bb.0:
; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
@@ -41,20 +52,31 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
ret { <16 x i32>, <16 x i32> } %res3
}
-declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusds_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
ret <16 x i32> %res
}
-define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
-; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512:
+define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_maskz_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
+; X86-LABEL: test_int_x86_avx512_maskz_vpdpbusds_512:
; X86: # %bb.0:
; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
@@ -64,7 +86,7 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
-; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512:
+; X64-LABEL: test_int_x86_avx512_maskz_vpdpbusds_512:
; X64: # %bb.0:
; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll
index 21d0010ff630..60d0298e057f 100644
--- a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll
@@ -2,18 +2,18 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
-declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>)
+declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <64 x i8>, <64 x i8>)
-define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
+define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_ask_vpdpbusd_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2)
ret <16 x i32> %1
}
-define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
+define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <64 x i8> %x1, ptr %x2p, <64 x i8> %x4, i16 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512:
; X86: # %bb.0:
; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
@@ -32,11 +32,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xda]
; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
- %x2 = load <16 x i32>, ptr %x2p
- %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %x2 = load <64 x i8>, ptr %x2p
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2)
%2 = bitcast i16 %x3 to <16 x i1>
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
- %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x4)
%5 = bitcast i16 %x3 to <16 x i1>
%6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
%res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
@@ -44,18 +44,18 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i
ret { <16 x i32>, <16 x i32> } %res2
}
-declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>)
+declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <64 x i8>, <64 x i8>)
-define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
+define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2)
ret <16 x i32> %1
}
-define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
+define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <64 x i8> %x1, ptr %x2p, <64 x i8> %x4, i16 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512:
; X86: # %bb.0:
; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
@@ -74,11 +74,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x
; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xda]
; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
- %x2 = load <16 x i32>, ptr %x2p
- %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %x2 = load <64 x i8>, ptr %x2p
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2)
%2 = bitcast i16 %x3 to <16 x i1>
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
- %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x4)
%5 = bitcast i16 %x3 to <16 x i1>
%6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
%res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
diff --git a/llvm/test/CodeGen/X86/avx_vnni-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx_vnni-intrinsics-upgrade.ll
new file mode 100644
index 000000000000..0f4a4f27b971
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx_vnni-intrinsics-upgrade.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s
+
+declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
+
+define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
+; CHECK-LABEL: test_int_x86_avx_vpdpbusd_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x50,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
+
+define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
+; CHECK-LABEL: test_int_x86_avx_vpdpbusd_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x50,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
+
+define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
+; CHECK-LABEL: test_int_x86_avx_vpdpbusds_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x51,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
+
+define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
+; CHECK-LABEL: test_int_x86_avx_vpdpbusds_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x51,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ ret <8 x i32> %res
+}
diff --git a/llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll
index a1db6e54fa79..de8b2a41bf8c 100644
--- a/llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll
@@ -4,9 +4,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=AVXVNNI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=AVX512VNNI
-declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <32 x i8>, <32 x i8>)
-define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
+define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) {
; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusd_256:
; AVXVNNI: # %bb.0:
; AVXVNNI-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x50,0xc2]
@@ -16,13 +16,13 @@ define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8
; AVX512VNNI: # %bb.0:
; AVX512VNNI-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x50,0xc2]
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
ret <8 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <16 x i8>, <16 x i8>)
-define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
+define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) {
; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusd_128:
; AVXVNNI: # %bb.0:
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x50,0xc2]
@@ -32,13 +32,13 @@ define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4
; AVX512VNNI: # %bb.0:
; AVX512VNNI-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x50,0xc2]
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
ret <4 x i32> %res
}
-declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <32 x i8>, <32 x i8>)
-define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
+define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) {
; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusds_256:
; AVXVNNI: # %bb.0:
; AVXVNNI-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x51,0xc2]
@@ -48,13 +48,13 @@ define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8
; AVX512VNNI: # %bb.0:
; AVX512VNNI-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x51,0xc2]
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2)
ret <8 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <16 x i8>, <16 x i8>)
-define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
+define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) {
; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusds_128:
; AVXVNNI: # %bb.0:
; AVXVNNI-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x51,0xc2]
@@ -64,7 +64,7 @@ define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4
; AVX512VNNI: # %bb.0:
; AVX512VNNI-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x51,0xc2]
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2)
ret <4 x i32> %res
}
diff --git a/llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll b/llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll
index 8601d454215a..abdc296ae1e1 100644
--- a/llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxvnniint16 | FileCheck %s
; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avxvnniint16 | FileCheck %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefix=AVX10
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefix=AVX10
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefix=AVX10
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefix=AVX10
define <4 x i32> @test_int_x86_avx2_vpdpwsud_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
; CHECK-LABEL: test_int_x86_avx2_vpdpwsud_128:
diff --git a/llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll b/llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll
index 607720fbc3f3..0ddd0171a58a 100644
--- a/llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxvnniint8 --show-mc-encoding | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnniint8 --show-mc-encoding | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X64
declare <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index 423f2c49e70e..474be4465d9b 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -654,3 +654,110 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
%2 = sext <64 x i1> %1 to <64 x i8>
ret <64 x i8> %2
}
+
+define <8 x i32> @PR157382(ptr %p0, ptr %p1, ptr %p2) {
+; SSE2-SSSE3-LABEL: PR157382:
+; SSE2-SSSE3: # %bb.0:
+; SSE2-SSSE3-NEXT: movdqu (%rdi), %xmm3
+; SSE2-SSSE3-NEXT: movdqu 16(%rdi), %xmm2
+; SSE2-SSSE3-NEXT: movdqu (%rsi), %xmm0
+; SSE2-SSSE3-NEXT: movdqu 16(%rsi), %xmm4
+; SSE2-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm6
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE2-SSSE3-NEXT: pxor %xmm7, %xmm6
+; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm8
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm8
+; SSE2-SSSE3-NEXT: pxor %xmm7, %xmm8
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm0
+; SSE2-SSSE3-NEXT: por %xmm6, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm4
+; SSE2-SSSE3-NEXT: por %xmm8, %xmm4
+; SSE2-SSSE3-NEXT: packssdw %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-SSSE3-NEXT: pcmpeqb %xmm5, %xmm1
+; SSE2-SSSE3-NEXT: pxor %xmm7, %xmm1
+; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
+; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-SSSE3-NEXT: psrad $16, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
+; SSE2-SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE2-SSSE3-NEXT: pslld $31, %xmm1
+; SSE2-SSSE3-NEXT: psrad $31, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX1-LABEL: PR157382:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqu (%rdi), %ymm0
+; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm3
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpcmpgtd %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpxor %xmm4, %xmm5, %xmm5
+; AVX1-NEXT: vmovdqu (%rsi), %xmm6
+; AVX1-NEXT: vmovdqu 16(%rsi), %xmm7
+; AVX1-NEXT: vpcmpgtd %xmm2, %xmm6, %xmm6
+; AVX1-NEXT: vpor %xmm6, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpgtd %xmm2, %xmm7, %xmm6
+; AVX1-NEXT: vpor %xmm6, %xmm5, %xmm5
+; AVX1-NEXT: vpackssdw %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpmovsxbw %xmm1, %xmm1
+; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpmovsxwd %xmm1, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX1-NEXT: vpmovsxwd %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR157382:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vmovdqu (%rsi), %ymm1
+; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpcmpgtd %ymm0, %ymm3, %ymm4
+; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpor %ymm1, %ymm4, %ymm1
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2
+; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: PR157382:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpcmpnltd %ymm2, %ymm0, %k0
+; AVX512-NEXT: vpcmpltd (%rsi), %ymm2, %k1
+; AVX512-NEXT: vptestmb %xmm1, %xmm1, %k2
+; AVX512-NEXT: korw %k1, %k0, %k0
+; AVX512-NEXT: korw %k2, %k0, %k1
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
+ %ld0 = load <8 x i32>, ptr %p0, align 1
+ %ld1 = load <8 x i32>, ptr %p1, align 1
+ %ld2 = load <8 x i8>, ptr %p2, align 1
+ %cmp0 = icmp sge <8 x i32> %ld0, zeroinitializer
+ %cmp1 = icmp sgt <8 x i32> %ld1, zeroinitializer
+ %cmp2 = icmp ne <8 x i8> %ld2, zeroinitializer
+ %cmp01 = or <8 x i1> %cmp0, %cmp1
+ %cmp012 = or <8 x i1> %cmp01, %cmp2
+ %res = select <8 x i1> %cmp012, <8 x i32> %ld0, <8 x i32> zeroinitializer
+ ret <8 x i32> %res
+}
diff --git a/llvm/test/CodeGen/X86/bswap-inline-asm.ll b/llvm/test/CodeGen/X86/bswap-inline-asm.ll
index f8f154c0688f..a9ce616b7ecc 100644
--- a/llvm/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/llvm/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,88 +1,150 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix CHK %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
-; CHK-NOT: InlineAsm
+; bswap inline assembly should be preserved as-is.
-; CHECK-LABEL: foo:
-; CHECK: bswapq
define i64 @foo(i64 %x) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: bswapq %rax
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: retq
%asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
ret i64 %asmtmp
}
-; CHECK-LABEL: bar:
-; CHECK: bswapq
define i64 @bar(i64 %x) nounwind {
+; CHECK-LABEL: bar:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: bswapq %rax
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: retq
%asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
ret i64 %asmtmp
}
-; CHECK-LABEL: pen:
-; CHECK: bswapl
define i32 @pen(i32 %x) nounwind {
- %asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+; CHECK-LABEL: pen:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: retq
+ %asmtmp = tail call i32 asm "bswapl ${0:k}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
ret i32 %asmtmp
}
-; CHECK-LABEL: s16:
-; CHECK: rolw $8,
define zeroext i16 @s16(i16 zeroext %x) nounwind {
+; CHECK-LABEL: s16:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: rorw $8, %di
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: retq
%asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
ret i16 %asmtmp
}
-; CHECK-LABEL: t16:
-; CHECK: rolw $8,
define zeroext i16 @t16(i16 zeroext %x) nounwind {
+; CHECK-LABEL: t16:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: rorw $8, %di
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: retq
%asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
ret i16 %asmtmp
}
-; CHECK-LABEL: u16:
-; CHECK: rolw $8,
define zeroext i16 @u16(i16 zeroext %x) nounwind {
+; CHECK-LABEL: u16:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: rolw $8, %di
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: retq
%asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
ret i16 %asmtmp
}
-; CHECK-LABEL: v16:
-; CHECK: rolw $8,
define zeroext i16 @v16(i16 zeroext %x) nounwind {
+; CHECK-LABEL: v16:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: rolw $8, %di
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: retq
%asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
ret i16 %asmtmp
}
-; CHECK-LABEL: s32:
-; CHECK: bswapl
define i32 @s32(i32 %x) nounwind {
+; CHECK-LABEL: s32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: retq
%asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
ret i32 %asmtmp
}
-; CHECK-LABEL: t32:
-; CHECK: bswapl
define i32 @t32(i32 %x) nounwind {
+; CHECK-LABEL: t32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: retq
%asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
ret i32 %asmtmp
}
-; CHECK-LABEL: u32:
-; CHECK: bswapl
define i32 @u32(i32 %x) nounwind {
+; CHECK-LABEL: u32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: rorw $8, %ax
+; CHECK-NEXT: rorl $16, %eax
+; CHECK-NEXT: rorw $8, %ax
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: retq
%asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
ret i32 %asmtmp
}
-; CHECK-LABEL: s64:
-; CHECK: bswapq
define i64 @s64(i64 %x) nounwind {
+; CHECK-LABEL: s64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: bswapq %rax
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: retq
%asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
ret i64 %asmtmp
}
-; CHECK-LABEL: t64:
-; CHECK: bswapq
define i64 @t64(i64 %x) nounwind {
+; CHECK-LABEL: t64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: ## InlineAsm Start
+; CHECK-NEXT: bswapq %rax
+; CHECK-NEXT: ## InlineAsm End
+; CHECK-NEXT: retq
%asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind
ret i64 %asmtmp
}
diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll
index 4a9840eac489..66d009cf1221 100644
--- a/llvm/test/CodeGen/X86/call-graph-section.ll
+++ b/llvm/test/CodeGen/X86/call-graph-section.ll
@@ -11,14 +11,12 @@ declare !type !2 ptr @baz(ptr)
define void @main() {
entry:
- %a = alloca i8, align 1
%fp_foo_val = load ptr, ptr null, align 8
call void (...) %fp_foo_val(), !callee_type !1
%fp_bar_val = load ptr, ptr null, align 8
- %param = trunc i64 0 to i8
- %call_fp_bar = call i32 %fp_bar_val(i8 signext %param), !callee_type !3
+ %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !3
%fp_baz_val = load ptr, ptr null, align 8
- %call_fp_baz = call ptr %fp_baz_val(ptr %a), !callee_type !4
+ %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4
ret void
}
diff --git a/llvm/test/CodeGen/X86/combine-gfni.ll b/llvm/test/CodeGen/X86/combine-gfni.ll
new file mode 100644
index 000000000000..b105cdf7ea89
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-gfni.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni,+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+gfni,+avx512bw | FileCheck %s --check-prefixes=AVX512
+
+define <16 x i8> @gf2p8affineqb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+; SSE-LABEL: gf2p8affineqb_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; SSE-NEXT: gf2p8affineqb $11, %xmm1, %xmm1
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pandn %xmm0, %xmm3
+; SSE-NEXT: por %xmm1, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gf2p8affineqb_freeze:
+; AVX: # %bb.0:
+; AVX-NEXT: vgf2p8affineqb $11, %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: gf2p8affineqb_freeze:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovb2m %xmm2, %k1
+; AVX512-NEXT: vgf2p8affineqb $11, %xmm1, %xmm1, %xmm0 {%k1}
+; AVX512-NEXT: retq
+ %i = icmp slt <16 x i8> %a2, zeroinitializer
+ %g = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %a1, <16 x i8> %a1, i8 11)
+ %f = freeze <16 x i8> %g
+ %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
+ ret <16 x i8> %r
+}
+
+define <16 x i8> @gf2p8affineinvqb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+; SSE-LABEL: gf2p8affineinvqb_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; SSE-NEXT: gf2p8affineinvqb $11, %xmm1, %xmm1
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pandn %xmm0, %xmm3
+; SSE-NEXT: por %xmm1, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gf2p8affineinvqb_freeze:
+; AVX: # %bb.0:
+; AVX-NEXT: vgf2p8affineinvqb $11, %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: gf2p8affineinvqb_freeze:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovb2m %xmm2, %k1
+; AVX512-NEXT: vgf2p8affineinvqb $11, %xmm1, %xmm1, %xmm0 {%k1}
+; AVX512-NEXT: retq
+ %i = icmp slt <16 x i8> %a2, zeroinitializer
+ %g = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %a1, <16 x i8> %a1, i8 11)
+ %f = freeze <16 x i8> %g
+ %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
+ ret <16 x i8> %r
+}
+
+define <16 x i8> @gf2p8mulb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+; SSE-LABEL: gf2p8mulb_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; SSE-NEXT: gf2p8mulb %xmm1, %xmm1
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pandn %xmm0, %xmm3
+; SSE-NEXT: por %xmm1, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gf2p8mulb_freeze:
+; AVX: # %bb.0:
+; AVX-NEXT: vgf2p8mulb %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: gf2p8mulb_freeze:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovb2m %xmm2, %k1
+; AVX512-NEXT: vgf2p8mulb %xmm1, %xmm1, %xmm0 {%k1}
+; AVX512-NEXT: retq
+ %i = icmp slt <16 x i8> %a2, zeroinitializer
+ %g = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %a1, <16 x i8> %a1)
+ %f = freeze <16 x i8> %g
+ %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
+ ret <16 x i8> %r
+}
+
+declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
+declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
+declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8)
+declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8)
+declare <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8>, <16 x i8>)
+declare <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8>, <32 x i8>)
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
new file mode 100644
index 000000000000..2cb060ea92b1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
@@ -0,0 +1,400 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX
+
+define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test1_vpmadd52l:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test1_vpmadd52l:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+
+ %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test2_vpmadd52l:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test2_vpmadd52l:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %and)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test3_vpmadd52l:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test3_vpmadd52l:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_wrong_bits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52l_wrong_bits:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
+; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52l_wrong_bits:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x1, splat (i64 2251799813685247) ; (1LL << 51) - 1
+ %or = or <2 x i64> %x2, splat (i64 2251799813685248) ; 1LL << 51
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52l_wrong_op:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52l_wrong_op:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %and, <2 x i64> %x1, <2 x i64> %x2)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52h:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52h:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+
+ %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
+ ret <2 x i64> %1
+}
+
+; Test the fold x * 0 + y -> y
+define <2 x i64> @test_vpmadd52l_mul_zero(<2 x i64> %x0, <2 x i64> %x1) {
+; CHECK-LABEL: test_vpmadd52l_mul_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> %x1)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52h_mul_zero(<2 x i64> %x0, <2 x i64> %x1) {
+; CHECK-LABEL: test_vpmadd52h_mul_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> %x1)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_mul_zero_commuted(<2 x i64> %x0, <2 x i64> %x1) {
+; CHECK-LABEL: test_vpmadd52l_mul_zero_commuted:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 0, i64 0>)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_mul_zero_both(<2 x i64> %x0) {
+; CHECK-LABEL: test_vpmadd52l_mul_zero_both:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_mul_zero_in_52bits(<2 x i64> %x0, <2 x i64> %x1) {
+; CHECK-LABEL: test_vpmadd52l_mul_zero_in_52bits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+
+ ; mul by (1 << 52)
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 4503599627370496), <2 x i64> %x1)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_add_zero(<2 x i64> %x0, <2 x i64> %x1) {
+; AVX512-LABEL: test_vpmadd52l_add_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52l_add_zero:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa %xmm2, %xmm0
+; AVX-NEXT: retq
+
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> <i64 0, i64 0>, <2 x i64> %x0, <2 x i64> %x1)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_mul_zero_scalar(<2 x i64> %x0, <2 x i64> %x1) {
+; AVX512-LABEL: test_vpmadd52l_mul_zero_scalar:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52l_mul_zero_scalar:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX-NEXT: retq
+
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 123>, <2 x i64> %x1)
+ ret <2 x i64> %1
+}
+
+; (1 << 51) * (1 << 1) -> 1 << 52 -> low 52 bits are zeroes
+define <2 x i64> @test_vpmadd52l_mul_lo52_zero(<2 x i64> %x0) {
+; CHECK-LABEL: test_vpmadd52l_mul_lo52_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 2251799813685248), <2 x i64> splat (i64 2))
+ ret <2 x i64> %1
+}
+
+; (1 << 25) * (1 << 26) = 1 << 51 -> high 52 bits are zeroes
+define <2 x i64> @test_vpmadd52h_mul_hi52_zero(<2 x i64> %x0) {
+; CHECK-LABEL: test_vpmadd52h_mul_hi52_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 33554432), <2 x i64> splat (i64 67108864))
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_mul_lo52_const(<2 x i64> %x0) {
+; AVX512-LABEL: test_vpmadd52l_mul_lo52_const:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52l_mul_lo52_const:
+; AVX: # %bb.0:
+; AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 123), <2 x i64> splat (i64 456))
+ ret <2 x i64> %1
+}
+
+; (1 << 51) * (1 << 51) -> 1 << 102 -> the high 52 bits is 1 << 50
+define <2 x i64> @test_vpmadd52h_mul_hi52_const(<2 x i64> %x0) {
+; AVX512-LABEL: test_vpmadd52h_mul_hi52_const:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52h_mul_hi52_const:
+; AVX: # %bb.0:
+; AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 2251799813685248), <2 x i64> splat (i64 2251799813685248))
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_mul_lo52_mask(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test_vpmadd52l_mul_lo52_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+ %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30
+ %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52h_mul_hi52_mask(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test_vpmadd52h_mul_hi52_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+ %and1 = lshr <2 x i64> %x0, splat (i64 40)
+ %and2 = lshr <2 x i64> %x1, splat (i64 40)
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_mul_lo52_mask_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52l_mul_lo52_mask_negative:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm2
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
+; AVX512-NEXT: vpmadd52luq %xmm1, %xmm2, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52l_mul_lo52_mask_negative:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
+ %and1 = and <2 x i64> %x0, splat (i64 2097152) ; 1LL << 21
+ %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52h_mul_hi52_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52h_mul_hi52_negative:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlq $30, %xmm0, %xmm2
+; AVX512-NEXT: vpsrlq $43, %xmm1, %xmm1
+; AVX512-NEXT: vpmadd52huq %xmm1, %xmm2, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52h_mul_hi52_negative:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlq $30, %xmm0, %xmm2
+; AVX-NEXT: vpsrlq $43, %xmm1, %xmm1
+; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
+ %and1 = lshr <2 x i64> %x0, splat (i64 30)
+ %and2 = lshr <2 x i64> %x1, splat (i64 43)
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test1_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test1_knownbits_vpmadd52l:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
+; CHECK-NEXT: # xmm0 = mem[0,0]
+; CHECK-NEXT: retq
+ %and1 = and <2 x i64> %x0, splat (i64 4)
+ %and2 = and <2 x i64> %x1, splat (i64 4)
+ %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2)
+ %ret = and <2 x i64> %madd, splat (i64 1)
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @test1_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test1_knownbits_vpmadd52h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3,3]
+; CHECK-NEXT: # xmm0 = mem[0,0]
+; CHECK-NEXT: retq
+ %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30
+ %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
+ %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 3), <2 x i64> %and1, <2 x i64> %and2)
+ %ret = and <2 x i64> %madd, splat (i64 3)
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @test2_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test2_knownbits_vpmadd52l:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1234,1234]
+; CHECK-NEXT: # xmm0 = mem[0,0]
+; CHECK-NEXT: retq
+ %and1 = and <2 x i64> %x0, splat (i64 67108864) ; 1LL << 26
+ %and2 = and <2 x i64> %x1, splat (i64 33554432) ; 1LL << 25
+ %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1234), <2 x i64> %and1, <2 x i64> %and2)
+ %ret = and <2 x i64> %madd, splat (i64 1234)
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @test2_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; CHECK-LABEL: test2_knownbits_vpmadd52h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
+; CHECK-NEXT: # xmm0 = mem[0,0]
+; CHECK-NEXT: retq
+ %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30
+ %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
+ ; add (1LL << 20) + 1
+ %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1025), <2 x i64> %and1, <2 x i64> %and2)
+ %ret = and <2 x i64> %madd, splat (i64 1)
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @test3_knownbits_vpmadd52l_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test3_knownbits_vpmadd52l_negative:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1]
+; AVX512-NEXT: vpor %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovdqa %xmm2, %xmm3
+; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm3
+; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test3_knownbits_vpmadd52l_negative:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1]
+; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa %xmm2, %xmm3
+; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0
+; AVX-NEXT: retq
+ %and1 = and <2 x i64> %x0, splat (i64 67108865) ; (1LL << 26) + 1
+ %or = or <2 x i64> %x1, splat (i64 1)
+ %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %or)
+ %ret = and <2 x i64> %madd, splat (i64 1)
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @test3_knownbits_vpmadd52h_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test3_knownbits_vpmadd52h_negative:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1]
+; AVX512-NEXT: vmovdqa %xmm2, %xmm3
+; AVX512-NEXT: vpmadd52huq %xmm1, %xmm0, %xmm3
+; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test3_knownbits_vpmadd52h_negative:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1]
+; AVX-NEXT: vmovdqa %xmm2, %xmm3
+; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0
+; AVX-NEXT: retq
+ %and1 = and <2 x i64> %x0, splat (i64 4194304) ; 1LL << 22
+ %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
+ ; add (1LL << 20) + 1
+ %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2)
+ %ret = and <2 x i64> %madd, splat (i64 1)
+ ret <2 x i64> %ret
+}
diff --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll
index 6f520aa57dcd..805b1b54d5b6 100644
--- a/llvm/test/CodeGen/X86/comi-flags.ll
+++ b/llvm/test/CodeGen/X86/comi-flags.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,NO-AVX10_2
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX,AVX10_2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2 | FileCheck %s --check-prefixes=AVX,AVX10_2
;
; SSE
diff --git a/llvm/test/CodeGen/X86/evex512-mem.ll b/llvm/test/CodeGen/X86/evex512-mem.ll
deleted file mode 100644
index 85bb3b3a5487..000000000000
--- a/llvm/test/CodeGen/X86/evex512-mem.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256
-
-define void @test1() {
-; AVX512-LABEL: test1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: movq 64, %rax
-; AVX512-NEXT: movq %rax, (%rax)
-; AVX512-NEXT: vmovups 0, %zmm0
-; AVX512-NEXT: vmovups %zmm0, (%rax)
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
-;
-; AVX256-LABEL: test1:
-; AVX256: # %bb.0:
-; AVX256-NEXT: movq 64, %rax
-; AVX256-NEXT: movq %rax, (%rax)
-; AVX256-NEXT: vmovups 0, %ymm0
-; AVX256-NEXT: vmovups 32, %ymm1
-; AVX256-NEXT: vmovups %ymm1, (%rax)
-; AVX256-NEXT: vmovups %ymm0, (%rax)
-; AVX256-NEXT: vzeroupper
-; AVX256-NEXT: retq
- call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 null, i64 72, i1 false)
- ret void
-}
-
-declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
diff --git a/llvm/test/CodeGen/X86/expand-large-fp-optnone.ll b/llvm/test/CodeGen/X86/expand-large-fp-optnone.ll
new file mode 100644
index 000000000000..a155d125a6d1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-large-fp-optnone.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s
+
+; expand-fp must also run with optnone
+
+; Function Attrs: noinline optnone
+define double @main(i224 %0) #0 {
+; CHECK-LABEL: main:
+; CHECK: # %bb.0: # %entryitofp-entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $88, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 144
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: orq %rdx, %rax
+; CHECK-NEXT: movl %ecx, %r8d
+; CHECK-NEXT: movq %rsi, %r9
+; CHECK-NEXT: orq %r8, %r9
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: orq %r9, %rax
+; CHECK-NEXT: je .LBB0_10
+; CHECK-NEXT: jmp .LBB0_1
+; CHECK-NEXT: .LBB0_1: # %itofp-if-end
+; CHECK-NEXT: movslq %ecx, %rax
+; CHECK-NEXT: movq %rax, %r9
+; CHECK-NEXT: sarq $31, %r9
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: xorq %rax, %rdx
+; CHECK-NEXT: xorq %rax, %rsi
+; CHECK-NEXT: xorq %r9, %rdi
+; CHECK-NEXT: subq %r9, %rdi
+; CHECK-NEXT: sbbq %rax, %rsi
+; CHECK-NEXT: sbbq %rax, %rdx
+; CHECK-NEXT: sbbq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %r8
+; CHECK-NEXT: shldq $32, %rdx, %r8
+; CHECK-NEXT: bsrq %r8, %rax
+; CHECK-NEXT: xorl $63, %eax
+; CHECK-NEXT: movq %rdx, %r10
+; CHECK-NEXT: shldq $32, %rsi, %r10
+; CHECK-NEXT: bsrq %r10, %r11
+; CHECK-NEXT: xorl $63, %r11d
+; CHECK-NEXT: orl $64, %r11d
+; CHECK-NEXT: testq %r8, %r8
+; CHECK-NEXT: cmovnel %eax, %r11d
+; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: shldq $32, %rdi, %rbx
+; CHECK-NEXT: bsrq %rbx, %r14
+; CHECK-NEXT: xorl $63, %r14d
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shlq $32, %rax
+; CHECK-NEXT: bsrq %rax, %rax
+; CHECK-NEXT: xorl $63, %eax
+; CHECK-NEXT: orl $64, %eax
+; CHECK-NEXT: testq %rbx, %rbx
+; CHECK-NEXT: cmovnel %r14d, %eax
+; CHECK-NEXT: subl $-128, %eax
+; CHECK-NEXT: orq %r8, %r10
+; CHECK-NEXT: cmovnel %r11d, %eax
+; CHECK-NEXT: movl $224, %r11d
+; CHECK-NEXT: subl %eax, %r11d
+; CHECK-NEXT: movl $223, %r10d
+; CHECK-NEXT: subl %eax, %r10d
+; CHECK-NEXT: cmpl $53, %r11d
+; CHECK-NEXT: jle .LBB0_8
+; CHECK-NEXT: # %bb.2: # %itofp-if-then4
+; CHECK-NEXT: movl %r11d, %r8d
+; CHECK-NEXT: subl $54, %r8d
+; CHECK-NEXT: je .LBB0_4
+; CHECK-NEXT: jmp .LBB0_3
+; CHECK-NEXT: .LBB0_3: # %itofp-if-then4
+; CHECK-NEXT: movl %r11d, %r8d
+; CHECK-NEXT: subl $55, %r8d
+; CHECK-NEXT: jne .LBB0_5
+; CHECK-NEXT: # %bb.11:
+; CHECK-NEXT: jmp .LBB0_6
+; CHECK-NEXT: .LBB0_4: # %itofp-sw-bb
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shldq $1, %rdi, %rax
+; CHECK-NEXT: movq %rdx, %r8
+; CHECK-NEXT: shldq $1, %rsi, %r8
+; CHECK-NEXT: shldq $1, %rdx, %rcx
+; CHECK-NEXT: addq %rdi, %rdi
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: movq %r8, %rdx
+; CHECK-NEXT: jmp .LBB0_6
+; CHECK-NEXT: .LBB0_5: # %itofp-sw-default
+; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movl %ecx, %r8d
+; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb $-87, %r8b
+; CHECK-NEXT: subb %al, %r8b
+; CHECK-NEXT: movb %r8b, %bl
+; CHECK-NEXT: shrb $6, %bl
+; CHECK-NEXT: movzbl %bl, %r12d
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, (%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq -24(%rsp,%r12,8), %rbx
+; CHECK-NEXT: movq -32(%rsp,%r12,8), %r13
+; CHECK-NEXT: movq %rcx, %rbp
+; CHECK-NEXT: movb %r8b, %cl
+; CHECK-NEXT: movq %r13, %r14
+; CHECK-NEXT: shrdq %cl, %rbx, %r14
+; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq -48(%rsp,%r12,8), %r15
+; CHECK-NEXT: movq -40(%rsp,%r12,8), %r12
+; CHECK-NEXT: movb %r8b, %cl
+; CHECK-NEXT: movq %r12, %r14
+; CHECK-NEXT: shrdq %cl, %r13, %r14
+; CHECK-NEXT: movb %r8b, %cl
+; CHECK-NEXT: shrq %cl, %rbx
+; CHECK-NEXT: movb %r8b, %cl
+; CHECK-NEXT: shrdq %cl, %r12, %r15
+; CHECK-NEXT: addb $55, %al
+; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rbp, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shrb $3, %cl
+; CHECK-NEXT: andb $24, %cl
+; CHECK-NEXT: negb %cl
+; CHECK-NEXT: movsbq %cl, %rdx
+; CHECK-NEXT: movq -80(%rsp,%rdx), %rsi
+; CHECK-NEXT: movq -72(%rsp,%rdx), %rdi
+; CHECK-NEXT: movq -64(%rsp,%rdx), %r8
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: movq %r8, %r12
+; CHECK-NEXT: shldq %cl, %rdi, %r12
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: movq %rsi, %r13
+; CHECK-NEXT: shlq %cl, %r13
+; CHECK-NEXT: orq %r12, %r13
+; CHECK-NEXT: movq -56(%rsp,%rdx), %rdx
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shldq %cl, %r8, %rdx
+; CHECK-NEXT: movl %edx, %edx
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shldq %cl, %rsi, %rdi
+; CHECK-NEXT: orq %rdx, %rdi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: orq %rdi, %r13
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: orq %rax, %r15
+; CHECK-NEXT: movq %r15, %rdi
+; CHECK-NEXT: movq %r14, %rsi
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; CHECK-NEXT: movq %rbx, %rcx
+; CHECK-NEXT: jmp .LBB0_6
+; CHECK-NEXT: .LBB0_6: # %itofp-sw-epilog
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shrl $2, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orq %rax, %rdi
+; CHECK-NEXT: addq $1, %rdi
+; CHECK-NEXT: adcq $0, %rsi
+; CHECK-NEXT: adcq $0, %rdx
+; CHECK-NEXT: adcq $0, %rcx
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: shldq $62, %rdi, %rdx
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: shrq $32, %rax
+; CHECK-NEXT: btq $55, %rdi
+; CHECK-NEXT: jae .LBB0_9
+; CHECK-NEXT: jmp .LBB0_7
+; CHECK-NEXT: .LBB0_7: # %itofp-if-then20
+; CHECK-NEXT: shldq $61, %rdi, %rsi
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq $32, %rax
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: movl %r11d, %r10d
+; CHECK-NEXT: jmp .LBB0_9
+; CHECK-NEXT: .LBB0_8: # %itofp-if-else
+; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: addb $85, %al
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shrb $3, %cl
+; CHECK-NEXT: andb $24, %cl
+; CHECK-NEXT: negb %cl
+; CHECK-NEXT: movsbq %cl, %rcx
+; CHECK-NEXT: movq 48(%rsp,%rcx), %rdx
+; CHECK-NEXT: movb %al, %cl
+; CHECK-NEXT: shlq %cl, %rdx
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: shrq $32, %rax
+; CHECK-NEXT: .LBB0_9: # %itofp-if-end26
+; CHECK-NEXT: andl $-2147483648, %r9d # imm = 0x80000000
+; CHECK-NEXT: shll $20, %r10d
+; CHECK-NEXT: addl $1072693248, %r10d # imm = 0x3FF00000
+; CHECK-NEXT: andl $1048575, %eax # imm = 0xFFFFF
+; CHECK-NEXT: orl %r9d, %eax
+; CHECK-NEXT: orl %r10d, %eax
+; CHECK-NEXT: movl %eax, %eax
+; CHECK-NEXT: shlq $32, %rax
+; CHECK-NEXT: movabsq $4294967295, %rcx # imm = 0xFFFFFFFF
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rdx, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: .LBB0_10: # %itofp-return
+; CHECK-NEXT: addq $88, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %x = sitofp i224 %0 to double
+ ret double %x
+}
+
+attributes #0 = { noinline optnone }
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 989aabc9e87b..864c2336f37c 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefixes=AVX10_2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
declare float @llvm.maximum.f32(float, float)
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
index eef87b5a9f85..54d82b0c1c92 100644
--- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefixes=AVX10_2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
declare float @llvm.maximumnum.f32(float, float)
diff --git a/llvm/test/CodeGen/X86/fp16-reload.mir b/llvm/test/CodeGen/X86/fp16-reload.mir
new file mode 100644
index 000000000000..ddbd48cbf3ee
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp16-reload.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=x86_64-unknown -start-before=twoaddressinstruction -stop-after=postrapseudos -verify-machineinstrs -o - %s | FileCheck %s
+
+...
+---
+name: test
+alignment: 16
+tracksRegLiveness: true
+debugInstrRef: true
+registers:
+liveins:
+ - { reg: '$xmm0', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+ hasCalls: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $xmm0
+
+ ; CHECK-LABEL: name: test
+ ; CHECK: liveins: $xmm0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: MOVSSmr $rsp, 1, $noreg, -4, $noreg, $xmm0 :: (store (s32) into %stack.0, align 2)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm0, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
+ ; CHECK-NEXT: renamable $xmm0 = MOVSSrm $rsp, 1, $noreg, -4, $noreg :: (load (s32) from %stack.0, align 2)
+ ; CHECK-NEXT: FNOP implicit-def $fpsw, implicit killed renamable $xmm0
+ ; CHECK-NEXT: RET 0
+ %0:fr16 = COPY killed $xmm0
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm0, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
+ FNOP implicit-def $fpsw, implicit %0:fr16
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/fp16-spill.ll b/llvm/test/CodeGen/X86/fp16-spill.ll
new file mode 100644
index 000000000000..6161009b6f56
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp16-spill.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
+
+define half @test(float %f, ptr %p) nounwind {
+; SSE2-LABEL: test:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pushq %rbx
+; SSE2-NEXT: subq $16, %rsp
+; SSE2-NEXT: movq %rdi, %rbx
+; SSE2-NEXT: callq __truncsfhf2@PLT
+; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE2-NEXT: callq __extendhfsf2@PLT
+; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE2-NEXT: #APP
+; SSE2-NEXT: #NO_APP
+; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movss %xmm0, (%rbx)
+; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: addq $16, %rsp
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test:
+; AVX: # %bb.0:
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: subq $16, %rsp
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: callq __truncsfhf2@PLT
+; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX-NEXT: callq __extendhfsf2@PLT
+; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX-NEXT: #APP
+; AVX-NEXT: #NO_APP
+; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss %xmm0, (%rbx)
+; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: addq $16, %rsp
+; AVX-NEXT: popq %rbx
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT: #APP
+; AVX512-NEXT: #NO_APP
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: vmovss %xmm0, (%rdi)
+; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT: retq
+ %t = fptrunc float %f to half
+ %t2 = fpext half %t to float
+ tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
+ store float %t2, ptr %p
+ ret half %t
+}
diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll
index c79e19f07cda..77eaaa1ca08d 100644
--- a/llvm/test/CodeGen/X86/fpenv.ll
+++ b/llvm/test/CodeGen/X86/fpenv.ll
@@ -11,244 +11,6 @@ declare i32 @llvm.get.fpmode.i32()
declare void @llvm.set.fpmode.i32(i32 %fpmode)
declare void @llvm.reset.fpmode()
-define void @func_01() nounwind {
-; X86-NOSSE-LABEL: func_01:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %eax
-; X86-NOSSE-NEXT: fnstcw (%esp)
-; X86-NOSSE-NEXT: orb $12, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldcw (%esp)
-; X86-NOSSE-NEXT: popl %eax
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE-LABEL: func_01:
-; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %eax
-; X86-SSE-NEXT: fnstcw (%esp)
-; X86-SSE-NEXT: orb $12, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: fldcw (%esp)
-; X86-SSE-NEXT: stmxcsr (%esp)
-; X86-SSE-NEXT: orb $96, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: ldmxcsr (%esp)
-; X86-SSE-NEXT: popl %eax
-; X86-SSE-NEXT: retl
-;
-; X64-LABEL: func_01:
-; X64: # %bb.0:
-; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: orb $12, -{{[0-9]+}}(%rsp)
-; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: orb $96, -{{[0-9]+}}(%rsp)
-; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: retq
- call void @llvm.set.rounding(i32 0) ; TowardZero (CW[11-10] = 11)
- ret void
-}
-
-define void @func_02() nounwind {
-; X86-NOSSE-LABEL: func_02:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %eax
-; X86-NOSSE-NEXT: fnstcw (%esp)
-; X86-NOSSE-NEXT: andb $-13, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: fldcw (%esp)
-; X86-NOSSE-NEXT: popl %eax
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE-LABEL: func_02:
-; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %eax
-; X86-SSE-NEXT: fnstcw (%esp)
-; X86-SSE-NEXT: andb $-13, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: fldcw (%esp)
-; X86-SSE-NEXT: stmxcsr (%esp)
-; X86-SSE-NEXT: andb $-97, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: ldmxcsr (%esp)
-; X86-SSE-NEXT: popl %eax
-; X86-SSE-NEXT: retl
-;
-; X64-LABEL: func_02:
-; X64: # %bb.0:
-; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: andb $-13, -{{[0-9]+}}(%rsp)
-; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: andb $-97, -{{[0-9]+}}(%rsp)
-; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: retq
- call void @llvm.set.rounding(i32 1) ; ToNearestTiesToEven (CW[11-10] = 00)
- ret void
-}
-
-define void @func_03() nounwind {
-; X86-NOSSE-LABEL: func_03:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %eax
-; X86-NOSSE-NEXT: fnstcw (%esp)
-; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
-; X86-NOSSE-NEXT: andl (%esp), %eax
-; X86-NOSSE-NEXT: orl $2048, %eax # imm = 0x800
-; X86-NOSSE-NEXT: movw %ax, (%esp)
-; X86-NOSSE-NEXT: fldcw (%esp)
-; X86-NOSSE-NEXT: popl %eax
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE-LABEL: func_03:
-; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %eax
-; X86-SSE-NEXT: fnstcw (%esp)
-; X86-SSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
-; X86-SSE-NEXT: andl (%esp), %eax
-; X86-SSE-NEXT: orl $2048, %eax # imm = 0x800
-; X86-SSE-NEXT: movw %ax, (%esp)
-; X86-SSE-NEXT: fldcw (%esp)
-; X86-SSE-NEXT: stmxcsr (%esp)
-; X86-SSE-NEXT: movl $-24577, %eax # imm = 0x9FFF
-; X86-SSE-NEXT: andl (%esp), %eax
-; X86-SSE-NEXT: orl $16384, %eax # imm = 0x4000
-; X86-SSE-NEXT: movl %eax, (%esp)
-; X86-SSE-NEXT: ldmxcsr (%esp)
-; X86-SSE-NEXT: popl %eax
-; X86-SSE-NEXT: retl
-;
-; X64-LABEL: func_03:
-; X64: # %bb.0:
-; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF
-; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT: orl $2048, %eax # imm = 0x800
-; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF
-; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT: orl $16384, %eax # imm = 0x4000
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: retq
- call void @llvm.set.rounding(i32 2) ; Upward (CW[11-10] = 10)
- ret void
-}
-
-define void @func_04() nounwind {
-; X86-NOSSE-LABEL: func_04:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %eax
-; X86-NOSSE-NEXT: fnstcw (%esp)
-; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
-; X86-NOSSE-NEXT: andl (%esp), %eax
-; X86-NOSSE-NEXT: orl $1024, %eax # imm = 0x400
-; X86-NOSSE-NEXT: movw %ax, (%esp)
-; X86-NOSSE-NEXT: fldcw (%esp)
-; X86-NOSSE-NEXT: popl %eax
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE-LABEL: func_04:
-; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %eax
-; X86-SSE-NEXT: fnstcw (%esp)
-; X86-SSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
-; X86-SSE-NEXT: andl (%esp), %eax
-; X86-SSE-NEXT: orl $1024, %eax # imm = 0x400
-; X86-SSE-NEXT: movw %ax, (%esp)
-; X86-SSE-NEXT: fldcw (%esp)
-; X86-SSE-NEXT: stmxcsr (%esp)
-; X86-SSE-NEXT: movl $-24577, %eax # imm = 0x9FFF
-; X86-SSE-NEXT: andl (%esp), %eax
-; X86-SSE-NEXT: orl $8192, %eax # imm = 0x2000
-; X86-SSE-NEXT: movl %eax, (%esp)
-; X86-SSE-NEXT: ldmxcsr (%esp)
-; X86-SSE-NEXT: popl %eax
-; X86-SSE-NEXT: retl
-;
-; X64-LABEL: func_04:
-; X64: # %bb.0:
-; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF
-; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT: orl $1024, %eax # imm = 0x400
-; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF
-; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT: orl $8192, %eax # imm = 0x2000
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: retq
- call void @llvm.set.rounding(i32 3) ; Downward (CW[11-10] = 01)
- ret void
-}
-
-define void @func_05(i32 %x) nounwind {
-; X86-NOSSE-LABEL: func_05:
-; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: leal 4(%eax,%eax), %ecx
-; X86-NOSSE-NEXT: movl $201, %eax
-; X86-NOSSE-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NOSSE-NEXT: shll %cl, %eax
-; X86-NOSSE-NEXT: andl $3072, %eax # imm = 0xC00
-; X86-NOSSE-NEXT: fnstcw (%esp)
-; X86-NOSSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF
-; X86-NOSSE-NEXT: andl (%esp), %ecx
-; X86-NOSSE-NEXT: orl %eax, %ecx
-; X86-NOSSE-NEXT: movw %cx, (%esp)
-; X86-NOSSE-NEXT: fldcw (%esp)
-; X86-NOSSE-NEXT: popl %eax
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE-LABEL: func_05:
-; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %eax
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: leal 4(%eax,%eax), %ecx
-; X86-SSE-NEXT: movl $201, %eax
-; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SSE-NEXT: shll %cl, %eax
-; X86-SSE-NEXT: andl $3072, %eax # imm = 0xC00
-; X86-SSE-NEXT: fnstcw (%esp)
-; X86-SSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF
-; X86-SSE-NEXT: andl (%esp), %ecx
-; X86-SSE-NEXT: orl %eax, %ecx
-; X86-SSE-NEXT: movw %cx, (%esp)
-; X86-SSE-NEXT: fldcw (%esp)
-; X86-SSE-NEXT: stmxcsr (%esp)
-; X86-SSE-NEXT: movl $-24577, %ecx # imm = 0x9FFF
-; X86-SSE-NEXT: andl (%esp), %ecx
-; X86-SSE-NEXT: leal (%ecx,%eax,8), %eax
-; X86-SSE-NEXT: movl %eax, (%esp)
-; X86-SSE-NEXT: ldmxcsr (%esp)
-; X86-SSE-NEXT: popl %eax
-; X86-SSE-NEXT: retl
-;
-; X64-LABEL: func_05:
-; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: leal 4(%rdi,%rdi), %ecx
-; X64-NEXT: movl $201, %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shll %cl, %eax
-; X64-NEXT: andl $3072, %eax # imm = 0xC00
-; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $-3073, %ecx # imm = 0xF3FF
-; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $-24577, %ecx # imm = 0x9FFF
-; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx
-; X64-NEXT: leal (%rcx,%rax,8), %eax
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
-; X64-NEXT: retq
- call void @llvm.set.rounding(i32 %x) ; Downward
- ret void
-}
-
define void @get_fpenv_01(ptr %ptr) #0 {
; X86-NOSSE-LABEL: get_fpenv_01:
; X86-NOSSE: # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/X86/freeze.ll b/llvm/test/CodeGen/X86/freeze.ll
index 3196f8177cc9..38e3e23f7caa 100644
--- a/llvm/test/CodeGen/X86/freeze.ll
+++ b/llvm/test/CodeGen/X86/freeze.ll
@@ -141,3 +141,48 @@ entry:
%z = urem i32 %y, 10
ret i32 %z
}
+
+; Make sure we don't crash when replacing all uses of N with an existing freeze N.
+
+define i64 @pr155345(ptr %p1, i1 %cond, ptr %p2, ptr %p3) {
+; X86ASM-LABEL: pr155345:
+; X86ASM: # %bb.0: # %entry
+; X86ASM-NEXT: movzbl (%rdi), %edi
+; X86ASM-NEXT: xorl %eax, %eax
+; X86ASM-NEXT: orb $1, %dil
+; X86ASM-NEXT: movb %dil, (%rdx)
+; X86ASM-NEXT: movzbl %dil, %edx
+; X86ASM-NEXT: cmovel %edx, %eax
+; X86ASM-NEXT: sete %dil
+; X86ASM-NEXT: testb $1, %sil
+; X86ASM-NEXT: cmovnel %edx, %eax
+; X86ASM-NEXT: movb %dl, (%rcx)
+; X86ASM-NEXT: movl $1, %edx
+; X86ASM-NEXT: movl %eax, %ecx
+; X86ASM-NEXT: shlq %cl, %rdx
+; X86ASM-NEXT: orb %sil, %dil
+; X86ASM-NEXT: movzbl %dil, %eax
+; X86ASM-NEXT: andl %edx, %eax
+; X86ASM-NEXT: andl $1, %eax
+; X86ASM-NEXT: retq
+entry:
+ %load1 = load i8, ptr %p1, align 1
+ %v1 = or i8 %load1, 1
+ %v2 = zext i8 %v1 to i32
+ store i8 %v1, ptr %p2, align 1
+ %v3 = load i8, ptr %p2, align 1
+ %ext1 = sext i8 %v3 to i64
+ %ext2 = zext i32 %v2 to i64
+ %cmp1 = icmp ult i64 0, %ext1
+ %v4 = select i1 %cond, i1 false, i1 %cmp1
+ %sel1 = select i1 %v4, i64 0, i64 %ext2
+ %shl = shl i64 1, %sel1
+ store i8 %v1, ptr %p3, align 1
+ %v5 = load i8, ptr %p3, align 1
+ %ext3 = sext i8 %v5 to i64
+ %cmp2 = icmp ult i64 0, %ext3
+ %v6 = select i1 %cond, i1 false, i1 %cmp2
+ %sel2 = select i1 %v6, i64 0, i64 1
+ %and = and i64 %sel2, %shl
+ ret i64 %and
+}
diff --git a/llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll b/llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll
new file mode 100644
index 000000000000..aebfc7d483d6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll
@@ -0,0 +1,580 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avxifma | FileCheck %s --check-prefixes=X64,AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512ifma | FileCheck %s --check-prefixes=X64,AVX512,AVX512-NOVL
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=X64,AVX512,AVX512VL
+
+; 67108863 == (1 << 26) - 1
+; 4503599627370496 == (1 << 52)
+; 4503599627370495 == (1 << 52) - 1
+
+define <8 x i64> @test_512_combine(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
+; AVX-LABEL: test_512_combine:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [67108863,67108863,67108863,67108863]
+; AVX-NEXT: vpand %ymm6, %ymm2, %ymm2
+; AVX-NEXT: vpand %ymm6, %ymm0, %ymm0
+; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm0, %ymm4
+; AVX-NEXT: vpand %ymm6, %ymm3, %ymm0
+; AVX-NEXT: vpand %ymm6, %ymm1, %ymm1
+; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm1, %ymm5
+; AVX-NEXT: vmovdqa %ymm4, %ymm0
+; AVX-NEXT: vmovdqa %ymm5, %ymm1
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_512_combine:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm3 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863]
+; AVX512-NEXT: vpandq %zmm3, %zmm0, %zmm0
+; AVX512-NEXT: vpandq %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vpmadd52luq %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %x_masked = and <8 x i64> %x, splat (i64 67108863)
+ %y_masked = and <8 x i64> %y, splat (i64 67108863)
+ %mul = mul nuw nsw <8 x i64> %x_masked, %y_masked
+ %res = add nuw nsw <8 x i64> %mul, %z
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_512_combine_v2(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
+; AVX-LABEL: test_512_combine_v2:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [3,3,3,3]
+; AVX-NEXT: vpand %ymm6, %ymm2, %ymm2
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm7 = [1125899906842623,1125899906842623,1125899906842623,1125899906842623]
+; AVX-NEXT: vpand %ymm7, %ymm0, %ymm0
+; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm0, %ymm4
+; AVX-NEXT: vpand %ymm6, %ymm3, %ymm0
+; AVX-NEXT: vpand %ymm7, %ymm1, %ymm1
+; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm1, %ymm5
+; AVX-NEXT: vmovdqa %ymm4, %ymm0
+; AVX-NEXT: vmovdqa %ymm5, %ymm1
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_512_combine_v2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
+; AVX512-NEXT: vpmadd52luq %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %x_masked = and <8 x i64> %x, splat (i64 1125899906842623) ; (1 << 50) - 1
+ %y_masked = and <8 x i64> %y, splat (i64 3)
+ %mul = mul nuw nsw <8 x i64> %x_masked, %y_masked
+ %res = add nuw nsw <8 x i64> %mul, %z
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_512_no_combine(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
+; AVX-LABEL: test_512_no_combine:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [4503599627370495,4503599627370495,4503599627370495,4503599627370495]
+; AVX-NEXT: vpand %ymm6, %ymm0, %ymm7
+; AVX-NEXT: vpand %ymm6, %ymm1, %ymm8
+; AVX-NEXT: vpand %ymm6, %ymm2, %ymm9
+; AVX-NEXT: vpand %ymm6, %ymm3, %ymm6
+; AVX-NEXT: vpsrlq $32, %ymm8, %ymm8
+; AVX-NEXT: vpmuludq %ymm3, %ymm8, %ymm8
+; AVX-NEXT: vpsrlq $32, %ymm6, %ymm6
+; AVX-NEXT: vpmuludq %ymm6, %ymm1, %ymm6
+; AVX-NEXT: vpaddq %ymm6, %ymm8, %ymm6
+; AVX-NEXT: vpsllq $32, %ymm6, %ymm6
+; AVX-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
+; AVX-NEXT: vpsrlq $32, %ymm7, %ymm3
+; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm3
+; AVX-NEXT: vpsrlq $32, %ymm9, %ymm7
+; AVX-NEXT: vpmuludq %ymm7, %ymm0, %ymm7
+; AVX-NEXT: vpaddq %ymm3, %ymm7, %ymm3
+; AVX-NEXT: vpsllq $32, %ymm3, %ymm3
+; AVX-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm4, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm3, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm5, %ymm1, %ymm1
+; AVX-NEXT: vpaddq %ymm6, %ymm1, %ymm1
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_512_no_combine:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4503599627370495,4503599627370495,4503599627370495,4503599627370495,4503599627370495,4503599627370495,4503599627370495,4503599627370495]
+; AVX512-NEXT: vpandq %zmm3, %zmm0, %zmm4
+; AVX512-NEXT: vpandq %zmm3, %zmm1, %zmm3
+; AVX512-NEXT: vpsrlq $32, %zmm4, %zmm4
+; AVX512-NEXT: vpmuludq %zmm1, %zmm4, %zmm4
+; AVX512-NEXT: vpsrlq $32, %zmm3, %zmm3
+; AVX512-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512-NEXT: vpaddq %zmm4, %zmm3, %zmm3
+; AVX512-NEXT: vpsllq $32, %zmm3, %zmm3
+; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %zmm3, %zmm0, %zmm0
+; AVX512-NEXT: retq
+ %x_masked = and <8 x i64> %x, splat (i64 4503599627370495)
+ %y_masked = and <8 x i64> %y, splat (i64 4503599627370495)
+ %mul = mul nuw nsw <8 x i64> %x_masked, %y_masked
+ %res = add nuw nsw <8 x i64> %mul, %z
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_512_no_combine_v2(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
+; AVX-LABEL: test_512_no_combine_v2:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlq $32, %ymm1, %ymm6
+; AVX-NEXT: vpmuludq %ymm3, %ymm6, %ymm6
+; AVX-NEXT: vpsrlq $32, %ymm3, %ymm7
+; AVX-NEXT: vpmuludq %ymm7, %ymm1, %ymm7
+; AVX-NEXT: vpaddq %ymm6, %ymm7, %ymm6
+; AVX-NEXT: vpsllq $32, %ymm6, %ymm6
+; AVX-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
+; AVX-NEXT: vpsrlq $32, %ymm0, %ymm3
+; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm3
+; AVX-NEXT: vpsrlq $32, %ymm2, %ymm7
+; AVX-NEXT: vpmuludq %ymm7, %ymm0, %ymm7
+; AVX-NEXT: vpaddq %ymm3, %ymm7, %ymm3
+; AVX-NEXT: vpsllq $32, %ymm3, %ymm3
+; AVX-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm4, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm3, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm5, %ymm1, %ymm1
+; AVX-NEXT: vpaddq %ymm6, %ymm1, %ymm1
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_512_no_combine_v2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm3
+; AVX512-NEXT: vpmuludq %zmm1, %zmm3, %zmm3
+; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm4
+; AVX512-NEXT: vpmuludq %zmm4, %zmm0, %zmm4
+; AVX512-NEXT: vpaddq %zmm3, %zmm4, %zmm3
+; AVX512-NEXT: vpsllq $32, %zmm3, %zmm3
+; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %zmm3, %zmm0, %zmm0
+; AVX512-NEXT: retq
+ %mul = mul <8 x i64> %x, %y
+ %res = add <8 x i64> %mul, %z
+ ret <8 x i64> %res
+}
+
+define <4 x i64> @test_256_combine(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
+; AVX-LABEL: test_256_combine:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
+; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX-NEXT: {vex} vpmadd52luq %ymm1, %ymm0, %ymm2
+; AVX-NEXT: vmovdqa %ymm2, %ymm0
+; AVX-NEXT: retq
+;
+; AVX512-NOVL-LABEL: test_256_combine:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
+; AVX512-NOVL-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX512-NOVL-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512-NOVL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0
+; AVX512-NOVL-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; AVX512-NOVL-NEXT: retq
+;
+; AVX512VL-LABEL: test_256_combine:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
+; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512VL-NEXT: vpmadd52luq %ymm1, %ymm0, %ymm2
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512VL-NEXT: retq
+ %x_masked = and <4 x i64> %x, splat(i64 67108863)
+ %y_masked = and <4 x i64> %y, splat(i64 67108863)
+ %mul = mul nuw nsw <4 x i64> %x_masked, %y_masked
+ %res = add nuw nsw <4 x i64> %z, %mul
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_256_no_combine(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
+; X64-LABEL: test_256_no_combine:
+; X64: # %bb.0:
+; X64-NEXT: vpsrlq $32, %ymm0, %ymm3
+; X64-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
+; X64-NEXT: vpsrlq $32, %ymm1, %ymm4
+; X64-NEXT: vpmuludq %ymm4, %ymm0, %ymm4
+; X64-NEXT: vpaddq %ymm3, %ymm4, %ymm3
+; X64-NEXT: vpsllq $32, %ymm3, %ymm3
+; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0
+; X64-NEXT: retq
+ %mul = mul <4 x i64> %x, %y
+ %res = add <4 x i64> %mul, %z
+ ret <4 x i64> %res
+}
+
+define <2 x i64> @test_128_combine(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; AVX-LABEL: test_128_combine:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
+; AVX-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa %xmm2, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-NOVL-LABEL: test_128_combine:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
+; AVX512-NOVL-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX512-NOVL-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512-NOVL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
+; AVX512-NOVL-NEXT: vpaddq %xmm0, %xmm2, %xmm0
+; AVX512-NOVL-NEXT: retq
+;
+; AVX512VL-LABEL: test_128_combine:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
+; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2
+; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512VL-NEXT: retq
+ %x_masked = and <2 x i64> %x, splat (i64 67108863)
+ %y_masked = and <2 x i64> %y, splat (i64 67108863)
+ %mul = mul <2 x i64> %x_masked, %y_masked
+ %res = add <2 x i64> %z, %mul
+ ret <2 x i64> %res
+}
+
+; Sanity check we're not applying this here
+define <1 x i64> @test_scalar_no_ifma(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) {
+; X64-LABEL: test_scalar_no_ifma:
+; X64: # %bb.0:
+; X64-NEXT: imulq %rsi, %rdi
+; X64-NEXT: leaq (%rdi,%rdx), %rax
+; X64-NEXT: retq
+ %mul = mul <1 x i64> %x, %y
+ %res = add <1 x i64> %mul, %z
+ ret <1 x i64> %res
+}
+
+; 40-bit and 13-bit, too wide
+define <8 x i64> @test_mixed_width_too_wide(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
+; AVX-LABEL: test_mixed_width_too_wide:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [8191,8191,8191,8191]
+; AVX-NEXT: vpand %ymm6, %ymm2, %ymm2
+; AVX-NEXT: vpand %ymm6, %ymm3, %ymm3
+; AVX-NEXT: vpmovzxdq {{.*#+}} ymm6 = [2155905028,2155905036,2155905044,2155905052]
+; AVX-NEXT: vpshufb %ymm6, %ymm1, %ymm7
+; AVX-NEXT: vpmuludq %ymm3, %ymm7, %ymm7
+; AVX-NEXT: vpsllq $32, %ymm7, %ymm7
+; AVX-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
+; AVX-NEXT: vpshufb %ymm6, %ymm0, %ymm3
+; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm3
+; AVX-NEXT: vpsllq $32, %ymm3, %ymm3
+; AVX-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm0, %ymm4, %ymm0
+; AVX-NEXT: vpaddq %ymm3, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm1, %ymm5, %ymm1
+; AVX-NEXT: vpaddq %ymm7, %ymm1, %ymm1
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_mixed_width_too_wide:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
+; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm3
+; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm0
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsllq $32, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm1
+; AVX512-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x40 = and <8 x i64> %x, splat (i64 1099511627775)
+ %y13 = and <8 x i64> %y, splat (i64 8191)
+ %mul = mul <8 x i64> %x40, %y13
+ %res = add <8 x i64> %z, %mul
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_zext32_inputs_not_safe(<8 x i32> %xi32, <8 x i32> %yi32, <8 x i64> %z) {
+; AVX-LABEL: test_zext32_inputs_not_safe:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} ymm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX-NEXT: vpmovzxdq {{.*#+}} ymm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX-NEXT: vpmuludq %ymm5, %ymm4, %ymm4
+; AVX-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm1
+; AVX-NEXT: vpaddq %ymm4, %ymm2, %ymm0
+; AVX-NEXT: vpaddq %ymm1, %ymm3, %ymm1
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_zext32_inputs_not_safe:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
+; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %x = zext <8 x i32> %xi32 to <8 x i64>
+ %y = zext <8 x i32> %yi32 to <8 x i64>
+ %mul = mul <8 x i64> %x, %y
+ %res = add <8 x i64> %z, %mul
+ ret <8 x i64> %res
+}
+
+define <16 x i64> @test_1024_combine_split(<16 x i64> %x, <16 x i64> %y, <16 x i64> %z) nounwind {
+; AVX-LABEL: test_1024_combine_split:
+; AVX: # %bb.0:
+; AVX-NEXT: pushq %rbp
+; AVX-NEXT: movq %rsp, %rbp
+; AVX-NEXT: andq $-32, %rsp
+; AVX-NEXT: subq $32, %rsp
+; AVX-NEXT: vmovdqa 112(%rbp), %ymm8
+; AVX-NEXT: vmovdqa 80(%rbp), %ymm9
+; AVX-NEXT: vmovdqa 48(%rbp), %ymm10
+; AVX-NEXT: vmovdqa 16(%rbp), %ymm11
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm12 = [67108863,67108863,67108863,67108863]
+; AVX-NEXT: vpand %ymm3, %ymm12, %ymm3
+; AVX-NEXT: vpand %ymm2, %ymm12, %ymm2
+; AVX-NEXT: vpand %ymm1, %ymm12, %ymm1
+; AVX-NEXT: vpand %ymm0, %ymm12, %ymm0
+; AVX-NEXT: vpand %ymm7, %ymm12, %ymm7
+; AVX-NEXT: {vex} vpmadd52luq %ymm7, %ymm3, %ymm8
+; AVX-NEXT: vpand %ymm6, %ymm12, %ymm3
+; AVX-NEXT: {vex} vpmadd52luq %ymm3, %ymm2, %ymm9
+; AVX-NEXT: vpand %ymm5, %ymm12, %ymm2
+; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm10
+; AVX-NEXT: vpand %ymm4, %ymm12, %ymm1
+; AVX-NEXT: {vex} vpmadd52luq %ymm1, %ymm0, %ymm11
+; AVX-NEXT: vmovdqa %ymm11, %ymm0
+; AVX-NEXT: vmovdqa %ymm10, %ymm1
+; AVX-NEXT: vmovdqa %ymm9, %ymm2
+; AVX-NEXT: vmovdqa %ymm8, %ymm3
+; AVX-NEXT: movq %rbp, %rsp
+; AVX-NEXT: popq %rbp
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_1024_combine_split:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm6 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863]
+; AVX512-NEXT: vpandq %zmm6, %zmm2, %zmm2
+; AVX512-NEXT: vpandq %zmm6, %zmm0, %zmm0
+; AVX512-NEXT: vpmadd52luq %zmm2, %zmm0, %zmm4
+; AVX512-NEXT: vpandq %zmm6, %zmm3, %zmm0
+; AVX512-NEXT: vpandq %zmm6, %zmm1, %zmm1
+; AVX512-NEXT: vpmadd52luq %zmm0, %zmm1, %zmm5
+; AVX512-NEXT: vmovdqa64 %zmm4, %zmm0
+; AVX512-NEXT: vmovdqa64 %zmm5, %zmm1
+; AVX512-NEXT: retq
+ %x_masked = and <16 x i64> %x, splat (i64 67108863)
+ %y_masked = and <16 x i64> %y, splat (i64 67108863)
+ %mul = mul <16 x i64> %x_masked, %y_masked
+ %res = add <16 x i64> %z, %mul
+ ret <16 x i64> %res
+}
+
+define <1 x i64> @test_not_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) {
+; X64-LABEL: test_not_v1i64:
+; X64: # %bb.0:
+; X64-NEXT: andl $67108863, %edi # imm = 0x3FFFFFF
+; X64-NEXT: imulq %rdi, %rdi
+; X64-NEXT: leaq (%rdi,%rdx), %rax
+; X64-NEXT: retq
+ %x_masked = and <1 x i64> %x, splat (i64 67108863)
+ %y_masked = and <1 x i64> %x, splat (i64 67108863)
+ %mul = mul <1 x i64> %x_masked, %y_masked
+ %res = add <1 x i64> %mul, %z
+ ret <1 x i64> %res
+}
+
+define <3 x i64> @test_v3i64(<3 x i64> %x, <3 x i64> %y, <3 x i64> %z) {
+; AVX-LABEL: test_v3i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
+; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX512-NOVL-LABEL: test_v3i64:
+; AVX512-NOVL: # %bb.0:
+; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
+; AVX512-NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NOVL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
+; AVX512-NOVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX512-NOVL-NEXT: retq
+;
+; AVX512VL-LABEL: test_v3i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+ %x_masked = and <3 x i64> %x, splat (i64 67108863)
+ %y_masked = and <3 x i64> %x, splat (i64 67108863)
+ %mul = mul <3 x i64> %x_masked, %y_masked
+ %res = add <3 x i64> %mul, %z
+ ret <3 x i64> %res
+}
+
+define <5 x i64> @test_v5i64(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) {
+; AVX-LABEL: test_v5i64:
+; AVX: # %bb.0:
+; AVX-NEXT: movq %rdi, %rax
+; AVX-NEXT: vmovq %r8, %xmm0
+; AVX-NEXT: vmovq %rcx, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovq %rdx, %xmm1
+; AVX-NEXT: vmovq %rsi, %xmm2
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm2
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
+; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
+; AVX-NEXT: vmovq %rcx, %xmm3
+; AVX-NEXT: vmovq %r9, %xmm4
+; AVX-NEXT: vpand %xmm3, %xmm4, %xmm3
+; AVX-NEXT: vpsrlq $32, %xmm3, %xmm4
+; AVX-NEXT: vpmuludq %xmm4, %xmm3, %xmm4
+; AVX-NEXT: vpsllq $33, %xmm4, %xmm4
+; AVX-NEXT: vpmuludq %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpaddq %xmm1, %xmm3, %xmm1
+; AVX-NEXT: vpaddq %xmm4, %xmm1, %xmm1
+; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm2
+; AVX-NEXT: vmovdqa %ymm2, (%rdi)
+; AVX-NEXT: vmovq %xmm1, 32(%rdi)
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v5i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: retq
+ %x_masked = and <5 x i64> %x, splat (i64 67108863)
+ %y_masked = and <5 x i64> %x, splat (i64 67108863)
+ %mul = mul <5 x i64> %x_masked, %y_masked
+ %res = add <5 x i64> %mul, %z
+ ret <5 x i64> %res
+}
+
+define <6 x i64> @test_v6i64(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) {
+; AVX-LABEL: test_v6i64:
+; AVX: # %bb.0:
+; AVX-NEXT: movq %rdi, %rax
+; AVX-NEXT: vmovq %r8, %xmm0
+; AVX-NEXT: vmovq %rcx, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovq %rdx, %xmm1
+; AVX-NEXT: vmovq %rsi, %xmm2
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm1
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [67108863,67108863,67108863,67108863]
+; AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm1
+; AVX-NEXT: vmovq %r9, %xmm0
+; AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpmuldq %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpaddq {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, 32(%rdi)
+; AVX-NEXT: vmovdqa %ymm1, (%rdi)
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v6i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: retq
+ %x_masked = and <6 x i64> %x, splat (i64 67108863)
+ %y_masked = and <6 x i64> %x, splat (i64 67108863)
+ %mul = mul <6 x i64> %x_masked, %y_masked
+ %res = add <6 x i64> %mul, %z
+ ret <6 x i64> %res
+}
+
+define <9 x i64> @test_v9i64(<9 x i64> %x, <9 x i64> %y, <9 x i64> %z) {
+; AVX-LABEL: test_v9i64:
+; AVX: # %bb.0:
+; AVX-NEXT: movq %rdi, %rax
+; AVX-NEXT: vmovq %r8, %xmm0
+; AVX-NEXT: vmovq %rcx, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovq %rdx, %xmm1
+; AVX-NEXT: vmovq %rsi, %xmm2
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: vmovq %r9, %xmm1
+; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX-NEXT: vinserti128 $1, {{[0-9]+}}(%rsp), %ymm1, %ymm1
+; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm3
+; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm4
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm5 = [67108863,67108863,67108863,67108863]
+; AVX-NEXT: vpand %ymm5, %ymm0, %ymm0
+; AVX-NEXT: vpand %ymm5, %ymm1, %ymm1
+; AVX-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
+; AVX-NEXT: vmovq %rcx, %xmm5
+; AVX-NEXT: vmovq {{.*#+}} xmm6 = mem[0],zero
+; AVX-NEXT: vpand %xmm5, %xmm6, %xmm5
+; AVX-NEXT: vpsrlq $32, %xmm5, %xmm6
+; AVX-NEXT: vpmuludq %xmm6, %xmm5, %xmm6
+; AVX-NEXT: vpsllq $33, %xmm6, %xmm6
+; AVX-NEXT: vpmuludq %xmm5, %xmm5, %xmm5
+; AVX-NEXT: vpaddq %xmm2, %xmm5, %xmm2
+; AVX-NEXT: vpaddq %xmm6, %xmm2, %xmm2
+; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm4
+; AVX-NEXT: {vex} vpmadd52luq %ymm1, %ymm1, %ymm3
+; AVX-NEXT: vmovdqa %ymm3, 32(%rdi)
+; AVX-NEXT: vmovdqa %ymm4, (%rdi)
+; AVX-NEXT: vmovq %xmm2, 64(%rdi)
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v9i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movq %rdi, %rax
+; AVX512-NEXT: vmovq %r8, %xmm0
+; AVX512-NEXT: vmovq %rcx, %xmm1
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512-NEXT: vmovq %rdx, %xmm1
+; AVX512-NEXT: vmovq %rsi, %xmm2
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX512-NEXT: vmovq %r9, %xmm1
+; AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512-NEXT: vinserti128 $1, {{[0-9]+}}(%rsp), %ymm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm2
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
+; AVX512-NEXT: vmovq %rcx, %xmm3
+; AVX512-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero
+; AVX512-NEXT: vpand %xmm3, %xmm4, %xmm3
+; AVX512-NEXT: vpsrlq $32, %xmm3, %xmm4
+; AVX512-NEXT: vpmuludq %xmm4, %xmm3, %xmm4
+; AVX512-NEXT: vpsllq $33, %xmm4, %xmm4
+; AVX512-NEXT: vpmuludq %xmm3, %xmm3, %xmm3
+; AVX512-NEXT: vpaddq %xmm1, %xmm3, %xmm1
+; AVX512-NEXT: vpaddq %xmm4, %xmm1, %xmm1
+; AVX512-NEXT: vpmadd52luq %zmm0, %zmm0, %zmm2
+; AVX512-NEXT: vmovq %xmm1, 64(%rdi)
+; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %x_masked = and <9 x i64> %x, splat (i64 67108863)
+ %y_masked = and <9 x i64> %x, splat (i64 67108863)
+ %mul = mul <9 x i64> %x_masked, %y_masked
+ %res = add <9 x i64> %mul, %z
+ ret <9 x i64> %res
+}
diff --git a/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll b/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll
index 57dccfc1b4a8..0538541a6f7b 100644
--- a/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -18,9 +18,9 @@ define i64 @t(ptr %arg) nounwind {
ret i64 0
}
-; Make sure that we translate this to the bswap intrinsic which lowers down without the
-; inline assembly.
-; CHECK-NOT: #APP
+; Make sure this lowers to inline assembly and is not translated to an
+; intrinsic.
+; CHECK: #APP
define i32 @s(i32 %argc, ptr nocapture %argv) unnamed_addr nounwind {
entry:
%0 = trunc i32 %argc to i16
diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll
index 3ac0fd7746a3..eccb32346a40 100644
--- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll
+++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll
@@ -22,41 +22,45 @@ define void @FontChange(i1 %foo) nounwind {
; CHECK-LABEL: FontChange:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: je .LBB0_10
+; CHECK-NEXT: je .LBB0_12
+; CHECK-NEXT: # %bb.1: # %bb298
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: # %bb.2: # %bb304
+; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_1: # %bb366
+; CHECK-NEXT: .LBB0_3: # %bb366
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: # %bb.2: # %bb428
+; CHECK-NEXT: jne .LBB0_3
+; CHECK-NEXT: .LBB0_4: # %bb428
; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: je .LBB0_10
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: je .LBB0_12
+; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: cmpb $0, 0
; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_4: # %bb650
+; CHECK-NEXT: .LBB0_6: # %bb650
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: je .LBB0_4
-; CHECK-NEXT: # %bb.5: # %bb662
+; CHECK-NEXT: je .LBB0_6
+; CHECK-NEXT: # %bb.7: # %bb662
; CHECK-NEXT: movl 0, %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl $57344, %ecx # imm = 0xE000
; CHECK-NEXT: cmpl $8192, %ecx # imm = 0x2000
-; CHECK-NEXT: jne .LBB0_10
-; CHECK-NEXT: # %bb.6: # %bb4884
+; CHECK-NEXT: jne .LBB0_12
+; CHECK-NEXT: # %bb.8: # %bb4884
; CHECK-NEXT: andl $7168, %eax # imm = 0x1C00
; CHECK-NEXT: cmpl $1024, %eax # imm = 0x400
-; CHECK-NEXT: jne .LBB0_10
-; CHECK-NEXT: # %bb.7: # %bb4932
+; CHECK-NEXT: jne .LBB0_12
+; CHECK-NEXT: # %bb.9: # %bb4932
; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: jne .LBB0_10
-; CHECK-NEXT: # %bb.8: # %bb4940
+; CHECK-NEXT: jne .LBB0_12
+; CHECK-NEXT: # %bb.10: # %bb4940
; CHECK-NEXT: movl 0, %eax
; CHECK-NEXT: cmpl $160, %eax
-; CHECK-NEXT: je .LBB0_10
-; CHECK-NEXT: # %bb.9: # %bb4940
+; CHECK-NEXT: je .LBB0_12
+; CHECK-NEXT: # %bb.11: # %bb4940
; CHECK-NEXT: cmpl $159, %eax
-; CHECK-NEXT: .LBB0_10: # %bb4897
+; CHECK-NEXT: .LBB0_12: # %bb4897
; CHECK-NEXT: retq
entry:
br i1 %foo, label %bb298, label %bb49
diff --git a/llvm/test/CodeGen/X86/isel-ceil.ll b/llvm/test/CodeGen/X86/isel-ceil.ll
new file mode 100644
index 000000000000..c82cfebd4814
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-ceil.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86
+
+define float @ceil_f32(float %a) nounwind readnone {
+; DAG-X64-LABEL: ceil_f32:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: jmp ceilf@PLT # TAILCALL
+;
+; FASTISEL-X64-LABEL: ceil_f32:
+; FASTISEL-X64: # %bb.0:
+; FASTISEL-X64-NEXT: pushq %rax
+; FASTISEL-X64-NEXT: callq ceilf@PLT
+; FASTISEL-X64-NEXT: popq %rax
+; FASTISEL-X64-NEXT: retq
+;
+; X86-LABEL: ceil_f32:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll ceilf
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: ceil_f32:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: jmp ceilf@PLT # TAILCALL
+ %c = call float @llvm.ceil.f32(float %a)
+ ret float %c
+}
+
+define double @ceil_f64(double %a) nounwind readnone {
+; DAG-X64-LABEL: ceil_f64:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: jmp ceil@PLT # TAILCALL
+;
+; FASTISEL-X64-LABEL: ceil_f64:
+; FASTISEL-X64: # %bb.0:
+; FASTISEL-X64-NEXT: pushq %rax
+; FASTISEL-X64-NEXT: callq ceil@PLT
+; FASTISEL-X64-NEXT: popq %rax
+; FASTISEL-X64-NEXT: retq
+;
+; X86-LABEL: ceil_f64:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NEXT: fstpl (%esp)
+; X86-NEXT: calll ceil
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: ceil_f64:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: jmp ceil@PLT # TAILCALL
+ %c = call double @llvm.ceil.f64(double %a)
+ ret double %c
+}
+
+define x86_fp80 @ceil_f80(x86_fp80 %a) nounwind readnone {
+; X64-LABEL: ceil_f80:
+; X64: # %bb.0:
+; X64-NEXT: subq $24, %rsp
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fstpt (%rsp)
+; X64-NEXT: callq ceill@PLT
+; X64-NEXT: addq $24, %rsp
+; X64-NEXT: retq
+;
+; X86-LABEL: ceil_f80:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: calll ceill
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: ceil_f80:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: subq $24, %rsp
+; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT: fstpt (%rsp)
+; GISEL-X64-NEXT: callq ceill@PLT
+; GISEL-X64-NEXT: addq $24, %rsp
+; GISEL-X64-NEXT: retq
+ %c = call x86_fp80 @llvm.ceil.f80(x86_fp80 %a)
+ ret x86_fp80 %c
+}
+
diff --git a/llvm/test/CodeGen/X86/isel-floor.ll b/llvm/test/CodeGen/X86/isel-floor.ll
new file mode 100644
index 000000000000..675925b61126
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-floor.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86
+
+define float @floor_f32(float %a) nounwind readnone {
+; DAG-X64-LABEL: floor_f32:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: jmp floorf@PLT # TAILCALL
+;
+; FASTISEL-X64-LABEL: floor_f32:
+; FASTISEL-X64: # %bb.0:
+; FASTISEL-X64-NEXT: pushq %rax
+; FASTISEL-X64-NEXT: callq floorf@PLT
+; FASTISEL-X64-NEXT: popq %rax
+; FASTISEL-X64-NEXT: retq
+;
+; X86-LABEL: floor_f32:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll floorf
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: floor_f32:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: jmp floorf@PLT # TAILCALL
+ %c = call float @llvm.floor.f32(float %a)
+ ret float %c
+}
+
+define double @floor_f64(double %a) nounwind readnone {
+; DAG-X64-LABEL: floor_f64:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: jmp floor@PLT # TAILCALL
+;
+; FASTISEL-X64-LABEL: floor_f64:
+; FASTISEL-X64: # %bb.0:
+; FASTISEL-X64-NEXT: pushq %rax
+; FASTISEL-X64-NEXT: callq floor@PLT
+; FASTISEL-X64-NEXT: popq %rax
+; FASTISEL-X64-NEXT: retq
+;
+; X86-LABEL: floor_f64:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NEXT: fstpl (%esp)
+; X86-NEXT: calll floor
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: floor_f64:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: jmp floor@PLT # TAILCALL
+ %c = call double @llvm.floor.f64(double %a)
+ ret double %c
+}
+
+define x86_fp80 @floor_f80(x86_fp80 %a) nounwind readnone {
+; X64-LABEL: floor_f80:
+; X64: # %bb.0:
+; X64-NEXT: subq $24, %rsp
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fstpt (%rsp)
+; X64-NEXT: callq floorl@PLT
+; X64-NEXT: addq $24, %rsp
+; X64-NEXT: retq
+;
+; X86-LABEL: floor_f80:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: calll floorl
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: floor_f80:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: subq $24, %rsp
+; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT: fstpt (%rsp)
+; GISEL-X64-NEXT: callq floorl@PLT
+; GISEL-X64-NEXT: addq $24, %rsp
+; GISEL-X64-NEXT: retq
+ %c = call x86_fp80 @llvm.floor.f80(x86_fp80 %a)
+ ret x86_fp80 %c
+}
+
diff --git a/llvm/test/CodeGen/X86/isel-ftrunc.ll b/llvm/test/CodeGen/X86/isel-ftrunc.ll
new file mode 100644
index 000000000000..9bf06193961a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-ftrunc.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86
+
+define float @trunc_f32(float %a) nounwind readnone {
+; DAG-X64-LABEL: trunc_f32:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: jmp truncf@PLT # TAILCALL
+;
+; FASTISEL-X64-LABEL: trunc_f32:
+; FASTISEL-X64: # %bb.0:
+; FASTISEL-X64-NEXT: pushq %rax
+; FASTISEL-X64-NEXT: callq truncf@PLT
+; FASTISEL-X64-NEXT: popq %rax
+; FASTISEL-X64-NEXT: retq
+;
+; X86-LABEL: trunc_f32:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll truncf
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: trunc_f32:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: jmp truncf@PLT # TAILCALL
+ %c = call float @llvm.trunc.f32(float %a)
+ ret float %c
+}
+
+define double @trunc_f64(double %a) nounwind readnone {
+; DAG-X64-LABEL: trunc_f64:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: jmp trunc@PLT # TAILCALL
+;
+; FASTISEL-X64-LABEL: trunc_f64:
+; FASTISEL-X64: # %bb.0:
+; FASTISEL-X64-NEXT: pushq %rax
+; FASTISEL-X64-NEXT: callq trunc@PLT
+; FASTISEL-X64-NEXT: popq %rax
+; FASTISEL-X64-NEXT: retq
+;
+; X86-LABEL: trunc_f64:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NEXT: fstpl (%esp)
+; X86-NEXT: calll trunc
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: trunc_f64:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: jmp trunc@PLT # TAILCALL
+ %c = call double @llvm.trunc.f64(double %a)
+ ret double %c
+}
+
+define x86_fp80 @trunc_f80(x86_fp80 %a) nounwind readnone {
+; X64-LABEL: trunc_f80:
+; X64: # %bb.0:
+; X64-NEXT: subq $24, %rsp
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fstpt (%rsp)
+; X64-NEXT: callq truncl@PLT
+; X64-NEXT: addq $24, %rsp
+; X64-NEXT: retq
+;
+; X86-LABEL: trunc_f80:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: calll truncl
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; GISEL-X64-LABEL: trunc_f80:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: subq $24, %rsp
+; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT: fstpt (%rsp)
+; GISEL-X64-NEXT: callq truncl@PLT
+; GISEL-X64-NEXT: addq $24, %rsp
+; GISEL-X64-NEXT: retq
+ %c = call x86_fp80 @llvm.trunc.f80(x86_fp80 %a)
+ ret x86_fp80 %c
+}
+
diff --git a/llvm/test/CodeGen/X86/llvm.acos.ll b/llvm/test/CodeGen/X86/isel-llvm.acos.ll
index 9176cf47bda7..9176cf47bda7 100644
--- a/llvm/test/CodeGen/X86/llvm.acos.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.acos.ll
diff --git a/llvm/test/CodeGen/X86/llvm.asin.ll b/llvm/test/CodeGen/X86/isel-llvm.asin.ll
index 87ffcc9c963c..87ffcc9c963c 100644
--- a/llvm/test/CodeGen/X86/llvm.asin.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.asin.ll
diff --git a/llvm/test/CodeGen/X86/llvm.atan.ll b/llvm/test/CodeGen/X86/isel-llvm.atan.ll
index c03361d18c1d..c03361d18c1d 100644
--- a/llvm/test/CodeGen/X86/llvm.atan.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.atan.ll
diff --git a/llvm/test/CodeGen/X86/llvm.atan2.ll b/llvm/test/CodeGen/X86/isel-llvm.atan2.ll
index aa56068e1778..aa56068e1778 100644
--- a/llvm/test/CodeGen/X86/llvm.atan2.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.atan2.ll
diff --git a/llvm/test/CodeGen/X86/llvm.cos.ll b/llvm/test/CodeGen/X86/isel-llvm.cos.ll
index af039854d349..af039854d349 100644
--- a/llvm/test/CodeGen/X86/llvm.cos.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.cos.ll
diff --git a/llvm/test/CodeGen/X86/llvm.cosh.ll b/llvm/test/CodeGen/X86/isel-llvm.cosh.ll
index a61867c11fd4..a61867c11fd4 100644
--- a/llvm/test/CodeGen/X86/llvm.cosh.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.cosh.ll
diff --git a/llvm/test/CodeGen/X86/isel-llvm.set.rounding.ll b/llvm/test/CodeGen/X86/isel-llvm.set.rounding.ll
new file mode 100644
index 000000000000..688add1e92ab
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-llvm.set.rounding.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=-sse | FileCheck %s --check-prefixes=X86-NOSSE,SDAG-X86-NOSSE
+; RUN: llc < %s -mtriple=i686-- -fast-isel -fast-isel-abort=1 -mattr=-sse | FileCheck %s --check-prefixes=X86-NOSSE,FASTISEL-X86-NOSSE
+; RUN: llc < %s -mtriple=i686-- -global-isel -global-isel-abort=2 -mattr=-sse | FileCheck %s --check-prefixes=X86-NOSSE,GISEL-X86-NOSSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-sse | FileCheck %s --check-prefixes=X64-NOSSE,SDAG-X64-NOSSE
+; RUN: llc < %s -mtriple=x86_64-- -fast-isel -fast-isel-abort=1 -mattr=-sse | FileCheck %s --check-prefixes=X64-NOSSE,FASTISEL-X64-NOSSE
+; RUN: llc < %s -mtriple=x86_64-- -global-isel -global-isel-abort=2 -mattr=-sse | FileCheck %s --check-prefixes=X64-NOSSE,GISEL-X64-NOSSE
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86,SDAG-X86
+; RUN: llc < %s -mtriple=i686-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X86,FASTISEL-X86
+; RUN: llc < %s -mtriple=i686-- -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86,GISEL-X86
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,SDAG-X64
+; RUN: llc < %s -mtriple=x86_64-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64,FASTISEL-X64
+; RUN: llc < %s -mtriple=x86_64-- -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64,GISEL-X64
+
+declare void @llvm.set.rounding(i32 %x)
+
+define void @func_01() nounwind {
+; X86-NOSSE-LABEL: func_01:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: orb $12, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X64-NOSSE-LABEL: func_01:
+; X64-NOSSE: # %bb.0:
+; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: orb $12, -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: retq
+;
+; X86-LABEL: func_01:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: fnstcw (%esp)
+; X86-NEXT: orb $12, {{[0-9]+}}(%esp)
+; X86-NEXT: fldcw (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: func_01:
+; X64: # %bb.0:
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: orb $12, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: orb $96, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 0) ; TowardZero (CW[11-10] = 11)
+ ret void
+}
+
+define void @func_02() nounwind {
+; X86-NOSSE-LABEL: func_02:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: andb $-13, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X64-NOSSE-LABEL: func_02:
+; X64-NOSSE: # %bb.0:
+; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: andb $-13, -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: retq
+;
+; X86-LABEL: func_02:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: fnstcw (%esp)
+; X86-NEXT: andb $-13, {{[0-9]+}}(%esp)
+; X86-NEXT: fldcw (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: func_02:
+; X64: # %bb.0:
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: andb $-13, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: andb $-97, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 1) ; ToNearestTiesToEven (CW[11-10] = 00)
+ ret void
+}
+
+define void @func_03() nounwind {
+; X86-NOSSE-LABEL: func_03:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X86-NOSSE-NEXT: andl (%esp), %eax
+; X86-NOSSE-NEXT: orl $2048, %eax # imm = 0x800
+; X86-NOSSE-NEXT: movw %ax, (%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X64-NOSSE-LABEL: func_03:
+; X64-NOSSE: # %bb.0:
+; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X64-NOSSE-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NOSSE-NEXT: orl $2048, %eax # imm = 0x800
+; X64-NOSSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: retq
+;
+; X86-LABEL: func_03:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: fnstcw (%esp)
+; X86-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X86-NEXT: andl (%esp), %eax
+; X86-NEXT: orl $2048, %eax # imm = 0x800
+; X86-NEXT: movw %ax, (%esp)
+; X86-NEXT: fldcw (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: func_03:
+; X64: # %bb.0:
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: orl $2048, %eax # imm = 0x800
+; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: orl $16384, %eax # imm = 0x4000
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 2) ; Upward (CW[11-10] = 10)
+ ret void
+}
+
+define void @func_04() nounwind {
+; X86-NOSSE-LABEL: func_04:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X86-NOSSE-NEXT: andl (%esp), %eax
+; X86-NOSSE-NEXT: orl $1024, %eax # imm = 0x400
+; X86-NOSSE-NEXT: movw %ax, (%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X64-NOSSE-LABEL: func_04:
+; X64-NOSSE: # %bb.0:
+; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X64-NOSSE-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NOSSE-NEXT: orl $1024, %eax # imm = 0x400
+; X64-NOSSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: retq
+;
+; X86-LABEL: func_04:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: fnstcw (%esp)
+; X86-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X86-NEXT: andl (%esp), %eax
+; X86-NEXT: orl $1024, %eax # imm = 0x400
+; X86-NEXT: movw %ax, (%esp)
+; X86-NEXT: fldcw (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: func_04:
+; X64: # %bb.0:
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: orl $1024, %eax # imm = 0x400
+; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: orl $8192, %eax # imm = 0x2000
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 3) ; Downward (CW[11-10] = 01)
+ ret void
+}
+
+define void @func_05(i32 %x) nounwind {
+; X86-NOSSE-LABEL: func_05:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: leal 4(%eax,%eax), %ecx
+; X86-NOSSE-NEXT: movl $201, %eax
+; X86-NOSSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOSSE-NEXT: shll %cl, %eax
+; X86-NOSSE-NEXT: andl $3072, %eax # imm = 0xC00
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF
+; X86-NOSSE-NEXT: andl (%esp), %ecx
+; X86-NOSSE-NEXT: orl %eax, %ecx
+; X86-NOSSE-NEXT: movw %cx, (%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X64-NOSSE-LABEL: func_05:
+; X64-NOSSE: # %bb.0:
+; X64-NOSSE-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NOSSE-NEXT: leal 4(%rdi,%rdi), %ecx
+; X64-NOSSE-NEXT: movl $201, %eax
+; X64-NOSSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOSSE-NEXT: shll %cl, %eax
+; X64-NOSSE-NEXT: andl $3072, %eax # imm = 0xC00
+; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF
+; X64-NOSSE-NEXT: andl -{{[0-9]+}}(%rsp), %ecx
+; X64-NOSSE-NEXT: orl %eax, %ecx
+; X64-NOSSE-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NOSSE-NEXT: retq
+;
+; X86-LABEL: func_05:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal 4(%eax,%eax), %ecx
+; X86-NEXT: movl $201, %eax
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: andl $3072, %eax # imm = 0xC00
+; X86-NEXT: fnstcw (%esp)
+; X86-NEXT: movl $-3073, %ecx # imm = 0xF3FF
+; X86-NEXT: andl (%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movw %cx, (%esp)
+; X86-NEXT: fldcw (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: func_05:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 4(%rdi,%rdi), %ecx
+; X64-NEXT: movl $201, %eax
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: shll %cl, %eax
+; X64-NEXT: andl $3072, %eax # imm = 0xC00
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-3073, %ecx # imm = 0xF3FF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT: orl %eax, %ecx
+; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-24577, %ecx # imm = 0x9FFF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT: leal (%rcx,%rax,8), %eax
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 %x) ; Downward
+ ret void
+}
+
+attributes #0 = { nounwind "use-soft-float"="true" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FASTISEL-X64: {{.*}}
+; FASTISEL-X64-NOSSE: {{.*}}
+; FASTISEL-X86: {{.*}}
+; FASTISEL-X86-NOSSE: {{.*}}
+; GISEL-X64: {{.*}}
+; GISEL-X64-NOSSE: {{.*}}
+; GISEL-X86: {{.*}}
+; GISEL-X86-NOSSE: {{.*}}
+; SDAG-X64: {{.*}}
+; SDAG-X64-NOSSE: {{.*}}
+; SDAG-X86: {{.*}}
+; SDAG-X86-NOSSE: {{.*}}
diff --git a/llvm/test/CodeGen/X86/llvm.sin.ll b/llvm/test/CodeGen/X86/isel-llvm.sin.ll
index 0f17f83d0102..0f17f83d0102 100644
--- a/llvm/test/CodeGen/X86/llvm.sin.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.sin.ll
diff --git a/llvm/test/CodeGen/X86/llvm.sincos.ll b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll
index 065710f91457..065710f91457 100644
--- a/llvm/test/CodeGen/X86/llvm.sincos.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll
diff --git a/llvm/test/CodeGen/X86/llvm.sinh.ll b/llvm/test/CodeGen/X86/isel-llvm.sinh.ll
index ef30f8de0695..ef30f8de0695 100644
--- a/llvm/test/CodeGen/X86/llvm.sinh.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.sinh.ll
diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/isel-llvm.tan.ll
index 4e76653cd129..4e76653cd129 100644
--- a/llvm/test/CodeGen/X86/llvm.tan.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.tan.ll
diff --git a/llvm/test/CodeGen/X86/llvm.tanh.ll b/llvm/test/CodeGen/X86/isel-llvm.tanh.ll
index c4f6e2f179cf..c4f6e2f179cf 100644
--- a/llvm/test/CodeGen/X86/llvm.tanh.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.tanh.ll
diff --git a/llvm/test/CodeGen/X86/kmov.ll b/llvm/test/CodeGen/X86/kmov.ll
index cab810d30cd7..8b1e69a97d54 100644
--- a/llvm/test/CodeGen/X86/kmov.ll
+++ b/llvm/test/CodeGen/X86/kmov.ll
@@ -143,6 +143,57 @@ define <8 x i1> @invert_i8_mask_extract_8(i8 %mask) {
ret <8 x i1> %cmp.45
}
+define <8 x i1> @i8_mask_extract_7(i8 %mask) {
+; X64-AVX512-LABEL: i8_mask_extract_7:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: shrb %dil
+; X64-AVX512-NEXT: movzbl %dil, %eax
+; X64-AVX512-NEXT: kmovd %eax, %k0
+; X64-AVX512-NEXT: vpmovm2w %k0, %xmm0
+; X64-AVX512-NEXT: retq
+;
+; X64-KNL-LABEL: i8_mask_extract_7:
+; X64-KNL: # %bb.0:
+; X64-KNL-NEXT: vmovd %edi, %xmm0
+; X64-KNL-NEXT: vpbroadcastb %xmm0, %xmm0
+; X64-KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,4,8,16,32,64,128,0,2,4,8,16,32,64,128,0]
+; X64-KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; X64-KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; X64-KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; X64-KNL-NEXT: retq
+ %.splatinsert = insertelement <8 x i8> poison, i8 %mask, i64 0
+ %.splat = shufflevector <8 x i8> %.splatinsert, <8 x i8> poison, <8 x i32> zeroinitializer
+ %1 = and <8 x i8> %.splat, <i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 poison>
+ %cmp.45 = icmp ne <8 x i8> %1, zeroinitializer
+ ret <8 x i1> %cmp.45
+}
+
+define <8 x i1> @invert_i8_mask_extract_7(i8 %mask) {
+; X64-AVX512-LABEL: invert_i8_mask_extract_7:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: shrb %dil
+; X64-AVX512-NEXT: movzbl %dil, %eax
+; X64-AVX512-NEXT: kmovd %eax, %k0
+; X64-AVX512-NEXT: knotb %k0, %k0
+; X64-AVX512-NEXT: vpmovm2w %k0, %xmm0
+; X64-AVX512-NEXT: retq
+;
+; X64-KNL-LABEL: invert_i8_mask_extract_7:
+; X64-KNL: # %bb.0:
+; X64-KNL-NEXT: vmovd %edi, %xmm0
+; X64-KNL-NEXT: vpbroadcastb %xmm0, %xmm0
+; X64-KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; X64-KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; X64-KNL-NEXT: retq
+ %.splatinsert = insertelement <8 x i8> poison, i8 %mask, i64 0
+ %.splat = shufflevector <8 x i8> %.splatinsert, <8 x i8> poison, <8 x i32> zeroinitializer
+ %1 = and <8 x i8> %.splat, <i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 poison>
+ %cmp.45 = icmp eq <8 x i8> %1, zeroinitializer
+ ret <8 x i1> %cmp.45
+}
+
define <4 x i1> @i16_mask_extract_4(i16 %mask) {
; X64-AVX512-LABEL: i16_mask_extract_4:
; X64-AVX512: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll
index 7bcf57311853..5f38645f7463 100644
--- a/llvm/test/CodeGen/X86/llrint-conv.ll
+++ b/llvm/test/CodeGen/X86/llrint-conv.ll
@@ -7,47 +7,15 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
-define i64 @testmsxh(half %x) nounwind {
-; X86-NOSSE-LABEL: testmsxh:
-; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: pushl %eax
-; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: calll __extendhfsf2
-; X86-NOSSE-NEXT: fstps (%esp)
-; X86-NOSSE-NEXT: calll llrintf
-; X86-NOSSE-NEXT: popl %ecx
-; X86-NOSSE-NEXT: retl
-;
-; X86-SSE2-LABEL: testmsxh:
-; X86-SSE2: # %bb.0: # %entry
-; X86-SSE2-NEXT: pushl %eax
-; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
-; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; X86-SSE2-NEXT: movw %ax, (%esp)
-; X86-SSE2-NEXT: calll __extendhfsf2
-; X86-SSE2-NEXT: fstps (%esp)
-; X86-SSE2-NEXT: calll llrintf
-; X86-SSE2-NEXT: popl %ecx
-; X86-SSE2-NEXT: retl
-;
-; X64-SSE-LABEL: testmsxh:
-; X64-SSE: # %bb.0: # %entry
-; X64-SSE-NEXT: pushq %rax
-; X64-SSE-NEXT: callq __extendhfsf2@PLT
-; X64-SSE-NEXT: callq rintf@PLT
-; X64-SSE-NEXT: callq __truncsfhf2@PLT
-; X64-SSE-NEXT: callq __extendhfsf2@PLT
-; X64-SSE-NEXT: cvttss2si %xmm0, %rax
-; X64-SSE-NEXT: popq %rcx
-; X64-SSE-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
- ret i64 %0
-}
+; FIXME: crash
+; define i64 @test_llrint_i64_f16(half %x) nounwind {
+; entry:
+; %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
+; ret i64 %0
+; }
-define i64 @testmsxs(float %x) nounwind {
-; X86-NOSSE-LABEL: testmsxs:
+define i64 @test_llrint_i64_f32(float %x) nounwind {
+; X86-NOSSE-LABEL: test_llrint_i64_f32:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
@@ -61,7 +29,7 @@ define i64 @testmsxs(float %x) nounwind {
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
-; X86-SSE2-LABEL: testmsxs:
+; X86-SSE2-LABEL: test_llrint_i64_f32:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: movl %esp, %ebp
@@ -77,7 +45,7 @@ define i64 @testmsxs(float %x) nounwind {
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
;
-; X86-AVX-LABEL: testmsxs:
+; X86-AVX-LABEL: test_llrint_i64_f32:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: pushl %ebp
; X86-AVX-NEXT: movl %esp, %ebp
@@ -93,12 +61,12 @@ define i64 @testmsxs(float %x) nounwind {
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
;
-; X64-SSE-LABEL: testmsxs:
+; X64-SSE-LABEL: test_llrint_i64_f32:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: cvtss2si %xmm0, %rax
; X64-SSE-NEXT: retq
;
-; X64-AVX-LABEL: testmsxs:
+; X64-AVX-LABEL: test_llrint_i64_f32:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vcvtss2si %xmm0, %rax
; X64-AVX-NEXT: retq
@@ -107,8 +75,8 @@ entry:
ret i64 %0
}
-define i64 @testmsxd(double %x) nounwind {
-; X86-NOSSE-LABEL: testmsxd:
+define i64 @test_llrint_i64_f64(double %x) nounwind {
+; X86-NOSSE-LABEL: test_llrint_i64_f64:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
@@ -122,7 +90,7 @@ define i64 @testmsxd(double %x) nounwind {
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
-; X86-SSE2-LABEL: testmsxd:
+; X86-SSE2-LABEL: test_llrint_i64_f64:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: movl %esp, %ebp
@@ -138,7 +106,7 @@ define i64 @testmsxd(double %x) nounwind {
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
;
-; X86-AVX-LABEL: testmsxd:
+; X86-AVX-LABEL: test_llrint_i64_f64:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: pushl %ebp
; X86-AVX-NEXT: movl %esp, %ebp
@@ -154,12 +122,12 @@ define i64 @testmsxd(double %x) nounwind {
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
;
-; X64-SSE-LABEL: testmsxd:
+; X64-SSE-LABEL: test_llrint_i64_f64:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: cvtsd2si %xmm0, %rax
; X64-SSE-NEXT: retq
;
-; X64-AVX-LABEL: testmsxd:
+; X64-AVX-LABEL: test_llrint_i64_f64:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax
; X64-AVX-NEXT: retq
@@ -168,8 +136,8 @@ entry:
ret i64 %0
}
-define i64 @testmsll(x86_fp80 %x) nounwind {
-; X86-LABEL: testmsll:
+define i64 @test_llrint_i64_f80(x86_fp80 %x) nounwind {
+; X86-LABEL: test_llrint_i64_f80:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
@@ -183,7 +151,7 @@ define i64 @testmsll(x86_fp80 %x) nounwind {
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
-; X64-LABEL: testmsll:
+; X64-LABEL: test_llrint_i64_f80:
; X64: # %bb.0: # %entry
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
@@ -195,8 +163,8 @@ entry:
}
; FIXME(#44744): incorrect libcall
-define i64 @testmslq(fp128 %x) nounwind {
-; X86-NOSSE-LABEL: testmslq:
+define i64 @test_llrint_i64_f128(fp128 %x) nounwind {
+; X86-NOSSE-LABEL: test_llrint_i64_f128:
; X86-NOSSE: # %bb.0: # %entry
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
@@ -212,7 +180,7 @@ define i64 @testmslq(fp128 %x) nounwind {
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
-; X86-SSE2-LABEL: testmslq:
+; X86-SSE2-LABEL: test_llrint_i64_f128:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: movl %esp, %ebp
@@ -228,7 +196,7 @@ define i64 @testmslq(fp128 %x) nounwind {
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
;
-; X86-AVX-LABEL: testmslq:
+; X86-AVX-LABEL: test_llrint_i64_f128:
; X86-AVX: # %bb.0: # %entry
; X86-AVX-NEXT: pushl %ebp
; X86-AVX-NEXT: movl %esp, %ebp
@@ -241,11 +209,181 @@ define i64 @testmslq(fp128 %x) nounwind {
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
;
-; X64-LABEL: testmslq:
+; X64-LABEL: test_llrint_i64_f128:
; X64: # %bb.0: # %entry
; X64-NEXT: jmp llrintl@PLT # TAILCALL
entry:
- %0 = tail call i64 @llvm.llrint.i64.fp128(fp128 %x)
+ %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x)
+ ret i64 %0
+}
+
+; FIXME: crash
+; define i64 @test_llrint_i64_f16_strict(half %x) nounwind strictfp {
+; entry:
+; %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %0
+; }
+
+define i64 @test_llrint_i64_f32_strict(float %x) nounwind strictfp {
+; X86-NOSSE-LABEL: test_llrint_i64_f32_strict:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstps (%esp)
+; X86-NOSSE-NEXT: wait
+; X86-NOSSE-NEXT: calll llrintf
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_llrint_i64_f32_strict:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: calll llrintf
+; X86-SSE2-NEXT: popl %ecx
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: test_llrint_i64_f32_strict:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: pushl %eax
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss %xmm0, (%esp)
+; X86-AVX-NEXT: calll llrintf
+; X86-AVX-NEXT: popl %ecx
+; X86-AVX-NEXT: retl
+;
+; X64-LABEL: test_llrint_i64_f32_strict:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq llrintf@PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i64 %0
+}
+
+define i64 @test_llrint_i64_f64_strict(double %x) nounwind strictfp {
+; X86-NOSSE-LABEL: test_llrint_i64_f64_strict:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstpl (%esp)
+; X86-NOSSE-NEXT: wait
+; X86-NOSSE-NEXT: calll llrint
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_llrint_i64_f64_strict:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: calll llrint
+; X86-SSE2-NEXT: addl $8, %esp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: test_llrint_i64_f64_strict:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: subl $8, %esp
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT: calll llrint
+; X86-AVX-NEXT: addl $8, %esp
+; X86-AVX-NEXT: retl
+;
+; X64-LABEL: test_llrint_i64_f64_strict:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq llrint@PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i64 %0
+}
+
+define i64 @test_llrint_i64_f80_strict(x86_fp80 %x) nounwind strictfp {
+; X86-LABEL: test_llrint_i64_f80_strict:
+; X86: # %bb.0: # %entry
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: wait
+; X86-NEXT: calll llrintl
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: test_llrint_i64_f80_strict:
+; X64: # %bb.0: # %entry
+; X64-NEXT: subq $24, %rsp
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fstpt (%rsp)
+; X64-NEXT: wait
+; X64-NEXT: callq llrintl@PLT
+; X64-NEXT: addq $24, %rsp
+; X64-NEXT: retq
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i64 %0
+}
+
+; FIXME(#44744): incorrect libcall
+define i64 @test_llrint_i64_f128_strict(fp128 %x) nounwind strictfp {
+; X86-NOSSE-LABEL: test_llrint_i64_f128_strict:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: subl $16, %esp
+; X86-NOSSE-NEXT: pushl 20(%ebp)
+; X86-NOSSE-NEXT: pushl 16(%ebp)
+; X86-NOSSE-NEXT: pushl 12(%ebp)
+; X86-NOSSE-NEXT: pushl 8(%ebp)
+; X86-NOSSE-NEXT: calll llrintl
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_llrint_i64_f128_strict:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: pushl 20(%ebp)
+; X86-SSE2-NEXT: pushl 16(%ebp)
+; X86-SSE2-NEXT: pushl 12(%ebp)
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: calll llrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: test_llrint_i64_f128_strict:
+; X86-AVX: # %bb.0: # %entry
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-16, %esp
+; X86-AVX-NEXT: subl $32, %esp
+; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll llrintl
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-LABEL: test_llrint_i64_f128_strict:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq llrintl@PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
ret i64 %0
}
diff --git a/llvm/test/CodeGen/X86/llround-conv.ll b/llvm/test/CodeGen/X86/llround-conv.ll
index 19a980b72809..ef4df82e9e57 100644
--- a/llvm/test/CodeGen/X86/llround-conv.ll
+++ b/llvm/test/CodeGen/X86/llround-conv.ll
@@ -1,88 +1,84 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64
-define i64 @testmsxs(float %x) {
-; X86-LABEL: testmsxs:
-; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %eax
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: flds {{[0-9]+}}(%esp)
-; X86-NEXT: fstps (%esp)
-; X86-NEXT: calll llroundf
-; X86-NEXT: popl %ecx
-; X86-NEXT: .cfi_def_cfa_offset 4
-; X86-NEXT: retl
+; FIXME: crash
+; define i64 @test_llround_f16(half %x) nounwind {
+; %conv = tail call i64 @llvm.llround.f16(half %x)
+; ret i64 %conv
+; }
+
+define i64 @test_llround_f32(float %x) nounwind {
+; X86-NOSSE-LABEL: test_llround_f32:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstps (%esp)
+; X86-NOSSE-NEXT: calll llroundf
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: retl
;
-; SSE2-LABEL: testmsxs:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pushl %eax
-; SSE2-NEXT: .cfi_def_cfa_offset 8
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss %xmm0, (%esp)
-; SSE2-NEXT: calll llroundf
-; SSE2-NEXT: popl %ecx
-; SSE2-NEXT: .cfi_def_cfa_offset 4
-; SSE2-NEXT: retl
+; X86-SSE2-LABEL: test_llround_f32:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: calll llroundf
+; X86-SSE2-NEXT: popl %ecx
+; X86-SSE2-NEXT: retl
;
-; GISEL-X86-LABEL: testmsxs:
-; GISEL-X86: # %bb.0: # %entry
+; X64-LABEL: test_llround_f32:
+; X64: # %bb.0:
+; X64-NEXT: jmp llroundf@PLT # TAILCALL
+;
+; GISEL-X86-LABEL: test_llround_f32:
+; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 16
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL-X86-NEXT: movl %eax, (%esp)
; GISEL-X86-NEXT: calll llroundf
; GISEL-X86-NEXT: addl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 4
; GISEL-X86-NEXT: retl
;
-; X64-LABEL: testmsxs:
-; X64: # %bb.0: # %entry
-; X64-NEXT: jmp llroundf@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmsxs:
-; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-LABEL: test_llround_f32:
+; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: pushq %rax
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 16
; GISEL-X64-NEXT: callq llroundf
; GISEL-X64-NEXT: popq %rcx
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 8
; GISEL-X64-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.llround.f32(float %x)
- ret i64 %0
+ %conv = tail call i64 @llvm.llround.f32(float %x)
+ ret i64 %conv
}
-define i64 @testmsxd(double %x) {
-; X86-LABEL: testmsxd:
-; X86: # %bb.0: # %entry
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 12
-; X86-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NEXT: fstpl (%esp)
-; X86-NEXT: calll llround
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
-; X86-NEXT: retl
+define i64 @test_llround_f64(double %x) nounwind {
+; X86-NOSSE-LABEL: test_llround_f64:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstpl (%esp)
+; X86-NOSSE-NEXT: calll llround
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
;
-; SSE2-LABEL: testmsxd:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: subl $8, %esp
-; SSE2-NEXT: .cfi_def_cfa_offset 12
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: movsd %xmm0, (%esp)
-; SSE2-NEXT: calll llround
-; SSE2-NEXT: addl $8, %esp
-; SSE2-NEXT: .cfi_def_cfa_offset 4
-; SSE2-NEXT: retl
+; X86-SSE2-LABEL: test_llround_f64:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: calll llround
+; X86-SSE2-NEXT: addl $8, %esp
+; X86-SSE2-NEXT: retl
;
-; GISEL-X86-LABEL: testmsxd:
-; GISEL-X86: # %bb.0: # %entry
+; X64-LABEL: test_llround_f64:
+; X64: # %bb.0:
+; X64-NEXT: jmp llround@PLT # TAILCALL
+;
+; GISEL-X86-LABEL: test_llround_f64:
+; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 16
; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL-X86-NEXT: movl 4(%eax), %eax
@@ -92,111 +88,140 @@ define i64 @testmsxd(double %x) {
; GISEL-X86-NEXT: movl %eax, 4(%edx)
; GISEL-X86-NEXT: calll llround
; GISEL-X86-NEXT: addl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 4
; GISEL-X86-NEXT: retl
;
-; X64-LABEL: testmsxd:
-; X64: # %bb.0: # %entry
-; X64-NEXT: jmp llround@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmsxd:
-; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-LABEL: test_llround_f64:
+; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: pushq %rax
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 16
; GISEL-X64-NEXT: callq llround
; GISEL-X64-NEXT: popq %rcx
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 8
; GISEL-X64-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.llround.f64(double %x)
- ret i64 %0
+ %conv = tail call i64 @llvm.llround.f64(double %x)
+ ret i64 %conv
}
-define i64 @testmsll(x86_fp80 %x) {
-; X86-LABEL: testmsll:
-; X86: # %bb.0: # %entry
+define i64 @test_llround_f80(x86_fp80 %x) nounwind {
+; X86-LABEL: test_llround_f80:
+; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
-; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll llroundl
; X86-NEXT: addl $12, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
-; SSE2-LABEL: testmsll:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: subl $12, %esp
-; SSE2-NEXT: .cfi_def_cfa_offset 16
-; SSE2-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE2-NEXT: fstpt (%esp)
-; SSE2-NEXT: calll llroundl
-; SSE2-NEXT: addl $12, %esp
-; SSE2-NEXT: .cfi_def_cfa_offset 4
-; SSE2-NEXT: retl
+; X64-LABEL: test_llround_f80:
+; X64: # %bb.0:
+; X64-NEXT: jmp llroundl@PLT # TAILCALL
;
-; GISEL-X86-LABEL: testmsll:
-; GISEL-X86: # %bb.0: # %entry
+; GISEL-X86-LABEL: test_llround_f80:
+; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 16
; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp)
; GISEL-X86-NEXT: fstpt (%esp)
; GISEL-X86-NEXT: calll llroundl
; GISEL-X86-NEXT: addl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 4
; GISEL-X86-NEXT: retl
;
-; X64-LABEL: testmsll:
-; X64: # %bb.0: # %entry
-; X64-NEXT: jmp llroundl@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmsll:
-; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-LABEL: test_llround_f80:
+; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: subq $24, %rsp
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 32
; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp)
; GISEL-X64-NEXT: fstpt (%rsp)
; GISEL-X64-NEXT: callq llroundl
; GISEL-X64-NEXT: addq $24, %rsp
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 8
; GISEL-X64-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.llround.f80(x86_fp80 %x)
- ret i64 %0
+ %conv = tail call i64 @llvm.llround.f80(x86_fp80 %x)
+ ret i64 %conv
}
-define i64 @test_llround_i64_f32(float %x) nounwind {
-; X86-LABEL: test_llround_i64_f32:
+; FIXME(#44744): incorrect libcall
+define i64 @test_llround_f128(fp128 %x) nounwind {
+; X86-LABEL: test_llround_f128:
; X86: # %bb.0:
-; X86-NEXT: pushl %eax
-; X86-NEXT: flds {{[0-9]+}}(%esp)
-; X86-NEXT: fstps (%esp)
-; X86-NEXT: calll llroundf
-; X86-NEXT: popl %ecx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: pushl 8(%ebp)
+; X86-NEXT: calll llroundl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
-; SSE2-LABEL: test_llround_i64_f32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pushl %eax
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss %xmm0, (%esp)
-; SSE2-NEXT: calll llroundf
-; SSE2-NEXT: popl %ecx
-; SSE2-NEXT: retl
+; X64-LABEL: test_llround_f128:
+; X64: # %bb.0:
+; X64-NEXT: jmp llroundl@PLT # TAILCALL
;
-; GISEL-X86-LABEL: test_llround_i64_f32:
+; GISEL-X86-LABEL: test_llround_f128:
; GISEL-X86: # %bb.0:
-; GISEL-X86-NEXT: subl $12, %esp
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: subl $24, %esp
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; GISEL-X86-NEXT: movl %eax, (%esp)
-; GISEL-X86-NEXT: calll llroundf
-; GISEL-X86-NEXT: addl $12, %esp
+; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: calll llroundf128
+; GISEL-X86-NEXT: addl $24, %esp
+; GISEL-X86-NEXT: popl %esi
; GISEL-X86-NEXT: retl
;
+; GISEL-X64-LABEL: test_llround_f128:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: pushq %rax
+; GISEL-X64-NEXT: callq llroundf128
+; GISEL-X64-NEXT: popq %rcx
+; GISEL-X64-NEXT: retq
+ %conv = tail call i64 @llvm.llround.f128(fp128 %x)
+ ret i64 %conv
+}
+
+; FIXME: crash
+; define i64 @test_llround_i64_f16(half %x) nounwind {
+; %conv = call i64 @llvm.llround.i64.f16(half %x)
+; ret i64 %conv
+; }
+
+define i64 @test_llround_i64_f32(float %x) nounwind {
+; X86-NOSSE-LABEL: test_llround_i64_f32:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstps (%esp)
+; X86-NOSSE-NEXT: calll llroundf
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_llround_i64_f32:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: calll llroundf
+; X86-SSE2-NEXT: popl %ecx
+; X86-SSE2-NEXT: retl
+;
; X64-LABEL: test_llround_i64_f32:
; X64: # %bb.0:
; X64-NEXT: jmp llroundf@PLT # TAILCALL
;
+; GISEL-X86-LABEL: test_llround_i64_f32:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: subl $12, %esp
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl %eax, (%esp)
+; GISEL-X86-NEXT: calll llroundf
+; GISEL-X86-NEXT: addl $12, %esp
+; GISEL-X86-NEXT: retl
+;
; GISEL-X64-LABEL: test_llround_i64_f32:
; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: pushq %rax
@@ -208,23 +233,27 @@ define i64 @test_llround_i64_f32(float %x) nounwind {
}
define i64 @test_llround_i64_f64(double %x) nounwind {
-; X86-LABEL: test_llround_i64_f64:
-; X86: # %bb.0:
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NEXT: fstpl (%esp)
-; X86-NEXT: calll llround
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: retl
+; X86-NOSSE-LABEL: test_llround_i64_f64:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstpl (%esp)
+; X86-NOSSE-NEXT: calll llround
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_llround_i64_f64:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: calll llround
+; X86-SSE2-NEXT: addl $8, %esp
+; X86-SSE2-NEXT: retl
;
-; SSE2-LABEL: test_llround_i64_f64:
-; SSE2: # %bb.0:
-; SSE2-NEXT: subl $8, %esp
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: movsd %xmm0, (%esp)
-; SSE2-NEXT: calll llround
-; SSE2-NEXT: addl $8, %esp
-; SSE2-NEXT: retl
+; X64-LABEL: test_llround_i64_f64:
+; X64: # %bb.0:
+; X64-NEXT: jmp llround@PLT # TAILCALL
;
; GISEL-X86-LABEL: test_llround_i64_f64:
; GISEL-X86: # %bb.0:
@@ -240,10 +269,6 @@ define i64 @test_llround_i64_f64(double %x) nounwind {
; GISEL-X86-NEXT: addl $12, %esp
; GISEL-X86-NEXT: retl
;
-; X64-LABEL: test_llround_i64_f64:
-; X64: # %bb.0:
-; X64-NEXT: jmp llround@PLT # TAILCALL
-;
; GISEL-X64-LABEL: test_llround_i64_f64:
; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: pushq %rax
@@ -264,14 +289,9 @@ define i64 @test_llround_i64_f80(x86_fp80 %x) nounwind {
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
-; SSE2-LABEL: test_llround_i64_f80:
-; SSE2: # %bb.0:
-; SSE2-NEXT: subl $12, %esp
-; SSE2-NEXT: fldt {{[0-9]+}}(%esp)
-; SSE2-NEXT: fstpt (%esp)
-; SSE2-NEXT: calll llroundl
-; SSE2-NEXT: addl $12, %esp
-; SSE2-NEXT: retl
+; X64-LABEL: test_llround_i64_f80:
+; X64: # %bb.0:
+; X64-NEXT: jmp llroundl@PLT # TAILCALL
;
; GISEL-X86-LABEL: test_llround_i64_f80:
; GISEL-X86: # %bb.0:
@@ -282,10 +302,6 @@ define i64 @test_llround_i64_f80(x86_fp80 %x) nounwind {
; GISEL-X86-NEXT: addl $12, %esp
; GISEL-X86-NEXT: retl
;
-; X64-LABEL: test_llround_i64_f80:
-; X64: # %bb.0:
-; X64-NEXT: jmp llroundl@PLT # TAILCALL
-;
; GISEL-X64-LABEL: test_llround_i64_f80:
; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: subq $24, %rsp
@@ -297,3 +313,79 @@ define i64 @test_llround_i64_f80(x86_fp80 %x) nounwind {
%conv = call i64 @llvm.llround.i64.f80(x86_fp80 %x)
ret i64 %conv
}
+
+; FIXME(#44744): incorrect libcall
+define i64 @test_llround_i64_f128(fp128 %x) nounwind {
+; X86-LABEL: test_llround_i64_f128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: pushl 8(%ebp)
+; X86-NEXT: calll llroundl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: test_llround_i64_f128:
+; X64: # %bb.0:
+; X64-NEXT: jmp llroundl@PLT # TAILCALL
+;
+; GISEL-X86-LABEL: test_llround_i64_f128:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: subl $24, %esp
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; GISEL-X86-NEXT: movl %eax, (%esp)
+; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: calll llroundf128
+; GISEL-X86-NEXT: addl $24, %esp
+; GISEL-X86-NEXT: popl %esi
+; GISEL-X86-NEXT: retl
+;
+; GISEL-X64-LABEL: test_llround_i64_f128:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: pushq %rax
+; GISEL-X64-NEXT: callq llroundf128
+; GISEL-X64-NEXT: popq %rcx
+; GISEL-X64-NEXT: retq
+ %conv = call i64 @llvm.llround.i64.f128(fp128 %x)
+ ret i64 %conv
+}
+
+; FIXME: not yet implemented for global isel
+; define i64 @test_llround_i64_f16_strict(half %x) nounwind strictfp {
+; %conv = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %conv
+; }
+
+; define i64 @test_llround_i64_f32_strict(float %x) nounwind strictfp {
+; %conv = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %conv
+; }
+
+; define i64 @test_llround_i64_f64_strict(double %x) nounwind strictfp {
+; %conv = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %conv
+; }
+
+; define i64 @test_llround_i64_f80_strict(x86_fp80 %x) nounwind strictfp {
+; %conv = call i64 @llvm.experimental.constrained.llround.i64.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %conv
+; }
+
+; ; FIXME(#44744): incorrect libcall
+; define i64 @test_llround_i64_f128_strict(fp128 %x) nounwind strictfp {
+; %conv = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %conv
+; }
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
index 3c50aea1095f..2b99b4c50f58 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
@@ -8,15 +8,15 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
; FIXME: crash
-; define i32 @testmswh(half %x) nounwind {
+; define i32 @test_lrint_i32_f16(half %x) nounwind {
; entry:
; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
; ret i32 %0
; }
-define i32 @testmsws(float %x) nounwind {
-; X86-NOSSE-LABEL: testmsws:
-; X86-NOSSE: # %bb.0: # %entry
+define i32 @test_lrint_i32_f32(float %x) nounwind {
+; X86-NOSSE-LABEL: test_lrint_i32_f32:
+; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %eax
; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpl (%esp)
@@ -24,33 +24,32 @@ define i32 @testmsws(float %x) nounwind {
; X86-NOSSE-NEXT: popl %ecx
; X86-NOSSE-NEXT: retl
;
-; X86-SSE2-LABEL: testmsws:
-; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-LABEL: test_lrint_i32_f32:
+; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: retl
;
-; X86-AVX-LABEL: testmsws:
-; X86-AVX: # %bb.0: # %entry
+; X86-AVX-LABEL: test_lrint_i32_f32:
+; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: retl
;
-; X64-SSE-LABEL: testmsws:
-; X64-SSE: # %bb.0: # %entry
+; X64-SSE-LABEL: test_lrint_i32_f32:
+; X64-SSE: # %bb.0:
; X64-SSE-NEXT: cvtss2si %xmm0, %eax
; X64-SSE-NEXT: retq
;
-; X64-AVX-LABEL: testmsws:
-; X64-AVX: # %bb.0: # %entry
+; X64-AVX-LABEL: test_lrint_i32_f32:
+; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vcvtss2si %xmm0, %eax
; X64-AVX-NEXT: retq
-entry:
- %0 = tail call i32 @llvm.lrint.i32.f32(float %x)
- ret i32 %0
+ %conv = tail call i32 @llvm.lrint.i32.f32(float %x)
+ ret i32 %conv
}
-define i32 @testmswd(double %x) nounwind {
-; X86-NOSSE-LABEL: testmswd:
-; X86-NOSSE: # %bb.0: # %entry
+define i32 @test_lrint_i32_f64(double %x) nounwind {
+; X86-NOSSE-LABEL: test_lrint_i32_f64:
+; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %eax
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpl (%esp)
@@ -58,33 +57,32 @@ define i32 @testmswd(double %x) nounwind {
; X86-NOSSE-NEXT: popl %ecx
; X86-NOSSE-NEXT: retl
;
-; X86-SSE2-LABEL: testmswd:
-; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-LABEL: test_lrint_i32_f64:
+; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: retl
;
-; X86-AVX-LABEL: testmswd:
-; X86-AVX: # %bb.0: # %entry
+; X86-AVX-LABEL: test_lrint_i32_f64:
+; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: retl
;
-; X64-SSE-LABEL: testmswd:
-; X64-SSE: # %bb.0: # %entry
+; X64-SSE-LABEL: test_lrint_i32_f64:
+; X64-SSE: # %bb.0:
; X64-SSE-NEXT: cvtsd2si %xmm0, %eax
; X64-SSE-NEXT: retq
;
-; X64-AVX-LABEL: testmswd:
-; X64-AVX: # %bb.0: # %entry
+; X64-AVX-LABEL: test_lrint_i32_f64:
+; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vcvtsd2si %xmm0, %eax
; X64-AVX-NEXT: retq
-entry:
- %0 = tail call i32 @llvm.lrint.i32.f64(double %x)
- ret i32 %0
+ %conv = tail call i32 @llvm.lrint.i32.f64(double %x)
+ ret i32 %conv
}
-define i32 @testmsll(x86_fp80 %x) nounwind {
-; X86-LABEL: testmsll:
-; X86: # %bb.0: # %entry
+define i32 @test_lrint_i32_f80(x86_fp80 %x) nounwind {
+; X86-LABEL: test_lrint_i32_f80:
+; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fistpl (%esp)
@@ -92,21 +90,20 @@ define i32 @testmsll(x86_fp80 %x) nounwind {
; X86-NEXT: popl %ecx
; X86-NEXT: retl
;
-; X64-LABEL: testmsll:
-; X64: # %bb.0: # %entry
+; X64-LABEL: test_lrint_i32_f80:
+; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fistpl -{{[0-9]+}}(%rsp)
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: retq
-entry:
- %0 = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x)
- ret i32 %0
+ %conv = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x)
+ ret i32 %conv
}
; FIXME(#44744): incorrect libcall
-define i32 @testmswq(fp128 %x) nounwind {
-; X86-NOSSE-LABEL: testmswq:
-; X86-NOSSE: # %bb.0: # %entry
+define i32 @test_lrint_i32_f128(fp128 %x) nounwind {
+; X86-NOSSE-LABEL: test_lrint_i32_f128:
+; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: andl $-16, %esp
@@ -121,8 +118,8 @@ define i32 @testmswq(fp128 %x) nounwind {
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
-; X86-SSE2-LABEL: testmswq:
-; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-LABEL: test_lrint_i32_f128:
+; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: andl $-16, %esp
@@ -137,8 +134,8 @@ define i32 @testmswq(fp128 %x) nounwind {
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl
;
-; X86-AVX-LABEL: testmswq:
-; X86-AVX: # %bb.0: # %entry
+; X86-AVX-LABEL: test_lrint_i32_f128:
+; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %ebp
; X86-AVX-NEXT: movl %esp, %ebp
; X86-AVX-NEXT: andl $-16, %esp
@@ -150,12 +147,176 @@ define i32 @testmswq(fp128 %x) nounwind {
; X86-AVX-NEXT: popl %ebp
; X86-AVX-NEXT: retl
;
-; X64-LABEL: testmswq:
-; X64: # %bb.0: # %entry
+; X64-LABEL: test_lrint_i32_f128:
+; X64: # %bb.0:
; X64-NEXT: jmp lrintl@PLT # TAILCALL
-entry:
- %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
- ret i32 %0
+ %conv = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
+ ret i32 %conv
+}
+
+; FIXME: crash
+; define i32 @test_lrint_i32_f16_strict(half %x) nounwind strictfp {
+; %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i32 %conv
+; }
+
+define i32 @test_lrint_i32_f32_strict(float %x) nounwind strictfp {
+; X86-NOSSE-LABEL: test_lrint_i32_f32_strict:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstps (%esp)
+; X86-NOSSE-NEXT: wait
+; X86-NOSSE-NEXT: calll lrintf
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lrint_i32_f32_strict:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: calll lrintf
+; X86-SSE2-NEXT: popl %ecx
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: test_lrint_i32_f32_strict:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: pushl %eax
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrintf
+; X86-AVX-NEXT: popl %ecx
+; X86-AVX-NEXT: retl
+;
+; X64-LABEL: test_lrint_i32_f32_strict:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq lrintf@PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i32 %conv
+}
+
+define i32 @test_lrint_i32_f64_strict(double %x) nounwind strictfp {
+; X86-NOSSE-LABEL: test_lrint_i32_f64_strict:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstpl (%esp)
+; X86-NOSSE-NEXT: wait
+; X86-NOSSE-NEXT: calll lrint
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lrint_i32_f64_strict:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: calll lrint
+; X86-SSE2-NEXT: addl $8, %esp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: test_lrint_i32_f64_strict:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: subl $8, %esp
+; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrint
+; X86-AVX-NEXT: addl $8, %esp
+; X86-AVX-NEXT: retl
+;
+; X64-LABEL: test_lrint_i32_f64_strict:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq lrint@PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i32 %conv
+}
+
+define i32 @test_lrint_i32_f80_strict(x86_fp80 %x) nounwind strictfp {
+; X86-LABEL: test_lrint_i32_f80_strict:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: wait
+; X86-NEXT: calll lrintl
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: test_lrint_i32_f80_strict:
+; X64: # %bb.0:
+; X64-NEXT: subq $24, %rsp
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fstpt (%rsp)
+; X64-NEXT: wait
+; X64-NEXT: callq lrintl@PLT
+; X64-NEXT: addq $24, %rsp
+; X64-NEXT: retq
+ %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i32 %conv
+}
+
+; FIXME(#44744): incorrect libcall
+define i32 @test_lrint_i32_f128_strict(fp128 %x) nounwind strictfp {
+; X86-NOSSE-LABEL: test_lrint_i32_f128_strict:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: subl $16, %esp
+; X86-NOSSE-NEXT: pushl 20(%ebp)
+; X86-NOSSE-NEXT: pushl 16(%ebp)
+; X86-NOSSE-NEXT: pushl 12(%ebp)
+; X86-NOSSE-NEXT: pushl 8(%ebp)
+; X86-NOSSE-NEXT: calll lrintl
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lrint_i32_f128_strict:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: pushl 20(%ebp)
+; X86-SSE2-NEXT: pushl 16(%ebp)
+; X86-SSE2-NEXT: pushl 12(%ebp)
+; X86-SSE2-NEXT: pushl 8(%ebp)
+; X86-SSE2-NEXT: calll lrintl
+; X86-SSE2-NEXT: addl $16, %esp
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; X86-AVX-LABEL: test_lrint_i32_f128_strict:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: pushl %ebp
+; X86-AVX-NEXT: movl %esp, %ebp
+; X86-AVX-NEXT: andl $-16, %esp
+; X86-AVX-NEXT: subl $32, %esp
+; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT: vmovups %xmm0, (%esp)
+; X86-AVX-NEXT: calll lrintl
+; X86-AVX-NEXT: movl %ebp, %esp
+; X86-AVX-NEXT: popl %ebp
+; X86-AVX-NEXT: retl
+;
+; X64-LABEL: test_lrint_i32_f128_strict:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq lrintl@PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i32 %conv
}
declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
index 2ba1500df0b6..731c03bf0d74 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
@@ -1,92 +1,311 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX
-define i64 @testmsxh(half %x) nounwind {
-; SSE-LABEL: testmsxh:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: pushq %rax
-; SSE-NEXT: callq __extendhfsf2@PLT
-; SSE-NEXT: callq rintf@PLT
-; SSE-NEXT: callq __truncsfhf2@PLT
-; SSE-NEXT: callq __extendhfsf2@PLT
-; SSE-NEXT: cvttss2si %xmm0, %rax
-; SSE-NEXT: popq %rcx
-; SSE-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
- ret i64 %0
-}
+; FIXME: crash
+; define i64 @test_lrint_i64_f16(half %x) nounwind {
+; %conv = tail call i64 @llvm.lrint.i64.f16(half %x)
+; ret i64 %conv
+; }
-define i64 @testmsxs(float %x) nounwind {
-; SSE-LABEL: testmsxs:
-; SSE: # %bb.0: # %entry
+define i64 @test_lrint_i64_f32(float %x) nounwind {
+; X86-NOSSE-LABEL: test_lrint_i64_f32:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-8, %esp
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: flds 8(%ebp)
+; X86-NOSSE-NEXT: fistpll (%esp)
+; X86-NOSSE-NEXT: movl (%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lrint_i64_f32:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: flds (%esp)
+; X86-SSE2-NEXT: fistpll (%esp)
+; X86-SSE2-NEXT: movl (%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; SSE-LABEL: test_lrint_i64_f32:
+; SSE: # %bb.0:
; SSE-NEXT: cvtss2si %xmm0, %rax
; SSE-NEXT: retq
;
-; AVX-LABEL: testmsxs:
-; AVX: # %bb.0: # %entry
+; AVX-LABEL: test_lrint_i64_f32:
+; AVX: # %bb.0:
; AVX-NEXT: vcvtss2si %xmm0, %rax
; AVX-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
- ret i64 %0
+ %conv = tail call i64 @llvm.lrint.i64.f32(float %x)
+ ret i64 %conv
}
-define i64 @testmsxd(double %x) nounwind {
-; SSE-LABEL: testmsxd:
-; SSE: # %bb.0: # %entry
+define i64 @test_lrint_i64_f64(double %x) nounwind {
+; X86-NOSSE-LABEL: test_lrint_i64_f64:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-8, %esp
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: fldl 8(%ebp)
+; X86-NOSSE-NEXT: fistpll (%esp)
+; X86-NOSSE-NEXT: movl (%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lrint_i64_f64:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: fldl (%esp)
+; X86-SSE2-NEXT: fistpll (%esp)
+; X86-SSE2-NEXT: movl (%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
+; SSE-LABEL: test_lrint_i64_f64:
+; SSE: # %bb.0:
; SSE-NEXT: cvtsd2si %xmm0, %rax
; SSE-NEXT: retq
;
-; AVX-LABEL: testmsxd:
-; AVX: # %bb.0: # %entry
+; AVX-LABEL: test_lrint_i64_f64:
+; AVX: # %bb.0:
; AVX-NEXT: vcvtsd2si %xmm0, %rax
; AVX-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
- ret i64 %0
+ %conv = tail call i64 @llvm.lrint.i64.f64(double %x)
+ ret i64 %conv
}
-define i64 @testmsll(x86_fp80 %x) nounwind {
-; CHECK-LABEL: testmsll:
-; CHECK: # %bb.0: # %entry
+define i64 @test_lrint_i64_f80(x86_fp80 %x) nounwind {
+; X86-LABEL: test_lrint_i64_f80:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: fldt 8(%ebp)
+; X86-NEXT: fistpll (%esp)
+; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; CHECK-LABEL: test_lrint_i64_f80:
+; CHECK: # %bb.0:
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
; CHECK-NEXT: fistpll -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x)
- ret i64 %0
+ %conv = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x)
+ ret i64 %conv
}
; FIXME(#44744): incorrect libcall
-define i64 @testmsxq(fp128 %x) nounwind {
-; CHECK-LABEL: testmsxq:
-; CHECK: # %bb.0: # %entry
+define i64 @test_lrint_i64_f128(fp128 %x) nounwind {
+; X86-LABEL: test_lrint_i64_f128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: pushl 8(%ebp)
+; X86-NEXT: calll lrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; CHECK-LABEL: test_lrint_i64_f128:
+; CHECK: # %bb.0:
; CHECK-NEXT: jmp lrintl@PLT # TAILCALL
-entry:
- %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
- ret i64 %0
+ %conv = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+ ret i64 %conv
+}
+
+; FIXME: crash
+; define i64 @test_lrint_i64_f16_strict(half %x) nounwind {
+; %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %conv
+; }
+
+define i64 @test_lrint_i64_f32_strict(float %x) nounwind {
+; X86-NOSSE-LABEL: test_lrint_i64_f32_strict:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstps (%esp)
+; X86-NOSSE-NEXT: calll lrintf
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lrint_i64_f32_strict:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: calll lrintf
+; X86-SSE2-NEXT: popl %ecx
+; X86-SSE2-NEXT: retl
+;
+; CHECK-LABEL: test_lrint_i64_f32_strict:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq lrintf@PLT
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+ %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i64 %conv
+}
+
+define i64 @test_lrint_i64_f64_strict(double %x) nounwind {
+; X86-NOSSE-LABEL: test_lrint_i64_f64_strict:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstpl (%esp)
+; X86-NOSSE-NEXT: calll lrint
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lrint_i64_f64_strict:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: calll lrint
+; X86-SSE2-NEXT: addl $8, %esp
+; X86-SSE2-NEXT: retl
+;
+; CHECK-LABEL: test_lrint_i64_f64_strict:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq lrint@PLT
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+ %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i64 %conv
+}
+
+define i64 @test_lrint_i64_f80_strict(x86_fp80 %x) nounwind {
+; X86-LABEL: test_lrint_i64_f80_strict:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: calll lrintl
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; CHECK-LABEL: test_lrint_i64_f80_strict:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fstpt (%rsp)
+; CHECK-NEXT: callq lrintl@PLT
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: retq
+ %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i64 %conv
+}
+
+; FIXME(#44744): incorrect libcall
+define i64 @test_lrint_i64_f128_strict(fp128 %x) nounwind {
+; X86-LABEL: test_lrint_i64_f128_strict:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: pushl 8(%ebp)
+; X86-NEXT: calll lrintl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; CHECK-LABEL: test_lrint_i64_f128_strict:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq lrintl@PLT
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+ %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+ ret i64 %conv
}
define i32 @PR125324(float %x) nounwind {
+; X86-NOSSE-LABEL: PR125324:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
+; X86-NOSSE-NEXT: andl $-8, %esp
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: flds 8(%ebp)
+; X86-NOSSE-NEXT: fistpll (%esp)
+; X86-NOSSE-NEXT: movl (%esp), %eax
+; X86-NOSSE-NEXT: movl %ebp, %esp
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: PR125324:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-8, %esp
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: flds (%esp)
+; X86-SSE2-NEXT: fistpll (%esp)
+; X86-SSE2-NEXT: movl (%esp), %eax
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
+; X86-SSE2-NEXT: retl
+;
; SSE-LABEL: PR125324:
-; SSE: # %bb.0: # %entry
+; SSE: # %bb.0:
; SSE-NEXT: cvtss2si %xmm0, %rax
; SSE-NEXT: # kill: def $eax killed $eax killed $rax
; SSE-NEXT: retq
;
; AVX-LABEL: PR125324:
-; AVX: # %bb.0: # %entry
+; AVX: # %bb.0:
; AVX-NEXT: vcvtss2si %xmm0, %rax
; AVX-NEXT: # kill: def $eax killed $eax killed $rax
; AVX-NEXT: retq
-entry:
- %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
- %1 = trunc i64 %0 to i32
- ret i32 %1
+ %conv = tail call i64 @llvm.lrint.i64.f32(float %x)
+ %trunc = trunc i64 %conv to i32
+ ret i32 %trunc
}
declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lround-conv-i32.ll b/llvm/test/CodeGen/X86/lround-conv-i32.ll
index c37536623143..389f29233dcc 100644
--- a/llvm/test/CodeGen/X86/lround-conv-i32.ll
+++ b/llvm/test/CodeGen/X86/lround-conv-i32.ll
@@ -1,17 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s
-; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64
-define i32 @testmsws(float %x) nounwind {
-; CHECK-LABEL: testmsws:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lroundf # TAILCALL
+; FIXME: crash
+; define i32 @test_lround_i32_f16(half %x) nounwind {
+; %conv = tail call i32 @llvm.lround.i32.f16(half %x)
+; ret i32 %conv
+; }
+
+define i32 @test_lround_i32_f32(float %x) nounwind {
+; X86-LABEL: test_lround_i32_f32:
+; X86: # %bb.0:
+; X86-NEXT: jmp lroundf # TAILCALL
+;
+; X64-LABEL: test_lround_i32_f32:
+; X64: # %bb.0:
+; X64-NEXT: jmp lroundf@PLT # TAILCALL
;
-; GISEL-X86-LABEL: testmsws:
-; GISEL-X86: # %bb.0: # %entry
+; GISEL-X86-LABEL: test_lround_i32_f32:
+; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $12, %esp
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL-X86-NEXT: movl %eax, (%esp)
@@ -19,28 +29,27 @@ define i32 @testmsws(float %x) nounwind {
; GISEL-X86-NEXT: addl $12, %esp
; GISEL-X86-NEXT: retl
;
-; X64-LABEL: testmsws:
-; X64: # %bb.0: # %entry
-; X64-NEXT: jmp lroundf@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmsws:
-; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-LABEL: test_lround_i32_f32:
+; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: pushq %rax
; GISEL-X64-NEXT: callq lroundf
; GISEL-X64-NEXT: popq %rcx
; GISEL-X64-NEXT: retq
-entry:
- %0 = tail call i32 @llvm.lround.i32.f32(float %x)
- ret i32 %0
+ %conv = tail call i32 @llvm.lround.i32.f32(float %x)
+ ret i32 %conv
}
-define i32 @testmswd(double %x) nounwind {
-; CHECK-LABEL: testmswd:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lround # TAILCALL
+define i32 @test_lround_i32_f64(double %x) nounwind {
+; X86-LABEL: test_lround_i32_f64:
+; X86: # %bb.0:
+; X86-NEXT: jmp lround # TAILCALL
+;
+; X64-LABEL: test_lround_i32_f64:
+; X64: # %bb.0:
+; X64-NEXT: jmp lround@PLT # TAILCALL
;
-; GISEL-X86-LABEL: testmswd:
-; GISEL-X86: # %bb.0: # %entry
+; GISEL-X86-LABEL: test_lround_i32_f64:
+; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $12, %esp
; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -53,28 +62,27 @@ define i32 @testmswd(double %x) nounwind {
; GISEL-X86-NEXT: addl $12, %esp
; GISEL-X86-NEXT: retl
;
-; X64-LABEL: testmswd:
-; X64: # %bb.0: # %entry
-; X64-NEXT: jmp lround@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmswd:
-; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-LABEL: test_lround_i32_f64:
+; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: pushq %rax
; GISEL-X64-NEXT: callq lround
; GISEL-X64-NEXT: popq %rcx
; GISEL-X64-NEXT: retq
-entry:
- %0 = tail call i32 @llvm.lround.i32.f64(double %x)
- ret i32 %0
+ %conv = tail call i32 @llvm.lround.i32.f64(double %x)
+ ret i32 %conv
}
-define i32 @testmsll(x86_fp80 %x) nounwind {
-; CHECK-LABEL: testmsll:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lroundl # TAILCALL
+define i32 @test_lround_i32_f80(x86_fp80 %x) nounwind {
+; X86-LABEL: test_lround_i32_f80:
+; X86: # %bb.0:
+; X86-NEXT: jmp lroundl # TAILCALL
+;
+; X64-LABEL: test_lround_i32_f80:
+; X64: # %bb.0:
+; X64-NEXT: jmp lroundl@PLT # TAILCALL
;
-; GISEL-X86-LABEL: testmsll:
-; GISEL-X86: # %bb.0: # %entry
+; GISEL-X86-LABEL: test_lround_i32_f80:
+; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $12, %esp
; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp)
; GISEL-X86-NEXT: fstpt (%esp)
@@ -82,19 +90,91 @@ define i32 @testmsll(x86_fp80 %x) nounwind {
; GISEL-X86-NEXT: addl $12, %esp
; GISEL-X86-NEXT: retl
;
-; X64-LABEL: testmsll:
-; X64: # %bb.0: # %entry
-; X64-NEXT: jmp lroundl@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmsll:
-; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-LABEL: test_lround_i32_f80:
+; GISEL-X64: # %bb.0:
; GISEL-X64-NEXT: subq $24, %rsp
; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp)
; GISEL-X64-NEXT: fstpt (%rsp)
; GISEL-X64-NEXT: callq lroundl
; GISEL-X64-NEXT: addq $24, %rsp
; GISEL-X64-NEXT: retq
-entry:
- %0 = tail call i32 @llvm.lround.i32.f80(x86_fp80 %x)
- ret i32 %0
+ %conv = tail call i32 @llvm.lround.i32.f80(x86_fp80 %x)
+ ret i32 %conv
}
+
+define i32 @test_lround_i32_f128(fp128 %x) nounwind {
+; X86-LABEL: test_lround_i32_f128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: pushl 8(%ebp)
+; X86-NEXT: calll lroundl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: test_lround_i32_f128:
+; X64: # %bb.0:
+; X64-NEXT: jmp lroundl@PLT # TAILCALL
+;
+; GISEL-X86-LABEL: test_lround_i32_f128:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: subl $24, %esp
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; GISEL-X86-NEXT: movl %eax, (%esp)
+; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: calll lroundf128
+; GISEL-X86-NEXT: addl $24, %esp
+; GISEL-X86-NEXT: popl %esi
+; GISEL-X86-NEXT: retl
+;
+; GISEL-X64-LABEL: test_lround_i32_f128:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: pushq %rax
+; GISEL-X64-NEXT: callq lroundf128
+; GISEL-X64-NEXT: popq %rcx
+; GISEL-X64-NEXT: retq
+ %conv = tail call i32 @llvm.lround.i32.f128(fp128 %x)
+ ret i32 %conv
+}
+
+; FIXME: not yet implemented in global isel
+; define i32 @test_lround_i32_f16_strict(half %x) nounwind strictfp {
+; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i32 %conv
+; }
+
+; define i32 @test_lround_i32_f32_strict(float %x) nounwind strictfp {
+; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i32 %conv
+; }
+
+; define i32 @test_lround_i32_f64_strict(double %x) nounwind strictfp {
+; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i32 %conv
+; }
+
+; define i32 @test_lround_i32_f80_strict(x86_fp80 %x) nounwind strictfp {
+; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i32 %conv
+; }
+
+; define i32 @test_lround_i32_f128_strict(fp128 %x) nounwind strictfp {
+; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i32 %conv
+; }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X86-NOSSE: {{.*}}
+; X86-SSE2: {{.*}}
diff --git a/llvm/test/CodeGen/X86/lround-conv-i64.ll b/llvm/test/CodeGen/X86/lround-conv-i64.ll
index 36b86f30ca13..8b8230074728 100644
--- a/llvm/test/CodeGen/X86/lround-conv-i64.ll
+++ b/llvm/test/CodeGen/X86/lround-conv-i64.ll
@@ -1,42 +1,86 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64
-define i64 @testmsxs(float %x) {
-; GISEL-X86-LABEL: testmsxs:
+; FIXME: crash
+; define i64 @test_lround_i64_f16(half %x) nounwind {
+; entry:
+; %0 = tail call i64 @llvm.lround.i64.f16(half %x)
+; ret i64 %0
+; }
+
+define i64 @test_lround_i64_f32(float %x) nounwind {
+; X86-NOSSE-LABEL: test_lround_i64_f32:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstps (%esp)
+; X86-NOSSE-NEXT: calll lroundf
+; X86-NOSSE-NEXT: popl %ecx
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lround_i64_f32:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss %xmm0, (%esp)
+; X86-SSE2-NEXT: calll lroundf
+; X86-SSE2-NEXT: popl %ecx
+; X86-SSE2-NEXT: retl
+;
+; X64-LABEL: test_lround_i64_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: jmp lroundf@PLT # TAILCALL
+;
+; GISEL-X86-LABEL: test_lround_i64_f32:
; GISEL-X86: # %bb.0: # %entry
; GISEL-X86-NEXT: subl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 16
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL-X86-NEXT: movl %eax, (%esp)
; GISEL-X86-NEXT: calll lroundf
; GISEL-X86-NEXT: addl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 4
; GISEL-X86-NEXT: retl
;
-; CHECK-LABEL: testmsxs:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lroundf@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmsxs:
+; GISEL-X64-LABEL: test_lround_i64_f32:
; GISEL-X64: # %bb.0: # %entry
; GISEL-X64-NEXT: pushq %rax
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 16
; GISEL-X64-NEXT: callq lroundf
; GISEL-X64-NEXT: popq %rcx
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 8
; GISEL-X64-NEXT: retq
entry:
%0 = tail call i64 @llvm.lround.i64.f32(float %x)
ret i64 %0
}
-define i64 @testmsxd(double %x) {
-; GISEL-X86-LABEL: testmsxd:
+define i64 @test_lround_i64_f64(double %x) nounwind {
+; X86-NOSSE-LABEL: test_lround_i64_f64:
+; X86-NOSSE: # %bb.0: # %entry
+; X86-NOSSE-NEXT: subl $8, %esp
+; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fstpl (%esp)
+; X86-NOSSE-NEXT: calll lround
+; X86-NOSSE-NEXT: addl $8, %esp
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: test_lround_i64_f64:
+; X86-SSE2: # %bb.0: # %entry
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movsd %xmm0, (%esp)
+; X86-SSE2-NEXT: calll lround
+; X86-SSE2-NEXT: addl $8, %esp
+; X86-SSE2-NEXT: retl
+;
+; X64-LABEL: test_lround_i64_f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: jmp lround@PLT # TAILCALL
+;
+; GISEL-X86-LABEL: test_lround_i64_f64:
; GISEL-X86: # %bb.0: # %entry
; GISEL-X86-NEXT: subl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 16
; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL-X86-NEXT: movl 4(%eax), %eax
@@ -46,53 +90,131 @@ define i64 @testmsxd(double %x) {
; GISEL-X86-NEXT: movl %eax, 4(%edx)
; GISEL-X86-NEXT: calll lround
; GISEL-X86-NEXT: addl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 4
; GISEL-X86-NEXT: retl
;
-; CHECK-LABEL: testmsxd:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lround@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmsxd:
+; GISEL-X64-LABEL: test_lround_i64_f64:
; GISEL-X64: # %bb.0: # %entry
; GISEL-X64-NEXT: pushq %rax
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 16
; GISEL-X64-NEXT: callq lround
; GISEL-X64-NEXT: popq %rcx
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 8
; GISEL-X64-NEXT: retq
entry:
%0 = tail call i64 @llvm.lround.i64.f64(double %x)
ret i64 %0
}
-define i64 @testmsll(x86_fp80 %x) {
-; GISEL-X86-LABEL: testmsll:
+define i64 @test_lround_i64_f80(x86_fp80 %x) nounwind {
+; X86-LABEL: test_lround_i64_f80:
+; X86: # %bb.0: # %entry
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: calll lroundl
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: test_lround_i64_f80:
+; X64: # %bb.0: # %entry
+; X64-NEXT: jmp lroundl@PLT # TAILCALL
+;
+; GISEL-X86-LABEL: test_lround_i64_f80:
; GISEL-X86: # %bb.0: # %entry
; GISEL-X86-NEXT: subl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 16
; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp)
; GISEL-X86-NEXT: fstpt (%esp)
; GISEL-X86-NEXT: calll lroundl
; GISEL-X86-NEXT: addl $12, %esp
-; GISEL-X86-NEXT: .cfi_def_cfa_offset 4
; GISEL-X86-NEXT: retl
;
-; CHECK-LABEL: testmsll:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: jmp lroundl@PLT # TAILCALL
-;
-; GISEL-X64-LABEL: testmsll:
+; GISEL-X64-LABEL: test_lround_i64_f80:
; GISEL-X64: # %bb.0: # %entry
; GISEL-X64-NEXT: subq $24, %rsp
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 32
; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp)
; GISEL-X64-NEXT: fstpt (%rsp)
; GISEL-X64-NEXT: callq lroundl
; GISEL-X64-NEXT: addq $24, %rsp
-; GISEL-X64-NEXT: .cfi_def_cfa_offset 8
; GISEL-X64-NEXT: retq
entry:
%0 = tail call i64 @llvm.lround.i64.f80(x86_fp80 %x)
ret i64 %0
}
+
+define i64 @test_lround_i64_f128(fp128 %x) nounwind {
+; X86-LABEL: test_lround_i64_f128:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: pushl 20(%ebp)
+; X86-NEXT: pushl 16(%ebp)
+; X86-NEXT: pushl 12(%ebp)
+; X86-NEXT: pushl 8(%ebp)
+; X86-NEXT: calll lroundl
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: test_lround_i64_f128:
+; X64: # %bb.0: # %entry
+; X64-NEXT: jmp lroundl@PLT # TAILCALL
+;
+; GISEL-X86-LABEL: test_lround_i64_f128:
+; GISEL-X86: # %bb.0: # %entry
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: subl $24, %esp
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; GISEL-X86-NEXT: movl %eax, (%esp)
+; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: calll lroundf128
+; GISEL-X86-NEXT: addl $24, %esp
+; GISEL-X86-NEXT: popl %esi
+; GISEL-X86-NEXT: retl
+;
+; GISEL-X64-LABEL: test_lround_i64_f128:
+; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-NEXT: pushq %rax
+; GISEL-X64-NEXT: callq lroundf128
+; GISEL-X64-NEXT: popq %rcx
+; GISEL-X64-NEXT: retq
+entry:
+ %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
+ ret i64 %0
+}
+
+; FIXME: not yet implemented in global isel
+; define i64 @test_lround_i64_f16_strict(half %x) nounwind strictfp {
+; entry:
+; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %0
+; }
+
+; define i64 @test_lround_i64_f32_strict(float %x) nounwind strictfp {
+; entry:
+; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %0
+; }
+
+; define i64 @test_lround_i64_f64_strict(double %x) nounwind strictfp {
+; entry:
+; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %0
+; }
+
+; define i64 @test_lround_i64_f80_strict(x86_fp80 %x) nounwind strictfp {
+; entry:
+; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %0
+; }
+
+; define i64 @test_lround_i64_f128_strict(fp128 %x) nounwind strictfp {
+; entry:
+; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict")
+; ret i64 %0
+; }
diff --git a/llvm/test/CodeGen/X86/lvi-hardening-ret.ll b/llvm/test/CodeGen/X86/lvi-hardening-ret.ll
index faa8bff8f094..954985a3798b 100644
--- a/llvm/test/CodeGen/X86/lvi-hardening-ret.ll
+++ b/llvm/test/CodeGen/X86/lvi-hardening-ret.ll
@@ -41,9 +41,9 @@ entry:
%add = add nsw i32 %0, %1
ret i32 %add
; CHECK-NOT: retq
-; CHECK: popq %rcx
+; CHECK: popq %rsi
; CHECK-NEXT: lfence
-; CHECK-NEXT: jmpq *%rcx
+; CHECK-NEXT: jmpq *%rsi
}
; Function Attrs: noinline nounwind optnone uwtable
@@ -52,9 +52,9 @@ define dso_local preserve_mostcc void @preserve_most() #0 {
entry:
ret void
; CHECK-NOT: retq
-; CHECK: popq %rax
+; CHECK: popq %r11
; CHECK-NEXT: lfence
-; CHECK-NEXT: jmpq *%rax
+; CHECK-NEXT: jmpq *%r11
}
; Function Attrs: noinline nounwind optnone uwtable
@@ -63,9 +63,9 @@ define dso_local preserve_allcc void @preserve_all() #0 {
entry:
ret void
; CHECK-NOT: retq
-; CHECK: popq %rax
+; CHECK: popq %r11
; CHECK-NEXT: lfence
-; CHECK-NEXT: jmpq *%rax
+; CHECK-NEXT: jmpq *%r11
}
define { i64, i128 } @ret_i64_i128() #0 {
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
index fb2433dbbb1e..7c9adaf31aff 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
@@ -730,36 +730,36 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa (%rdi), %xmm2
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5
; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpsubq %xmm0, %xmm4, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm6
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm7
; AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9
+; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9
; AVX1-NEXT: vpmuludq %xmm0, %xmm9, %xmm0
-; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
-; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
+; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
-; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm1
-; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7
+; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7
; AVX1-NEXT: vpmuludq %xmm7, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
-; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
+; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -767,20 +767,20 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin
; AVX2-LABEL: vec256_i64_signed_mem_reg:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm3
-; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1]
+; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3
; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpsubq %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm4
; AVX2-NEXT: vpsrlq $33, %ymm0, %ymm0
-; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vpaddq %ymm0, %ymm3, %ymm0
-; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2
+; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2
; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -790,36 +790,36 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1
; XOP-NEXT: vmovdqa (%rdi), %xmm2
; XOP-NEXT: vmovdqa 16(%rdi), %xmm3
-; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm4
+; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm4
+; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm5
; XOP-NEXT: vpsubq %xmm0, %xmm2, %xmm0
-; XOP-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; XOP-NEXT: vpsubq %xmm0, %xmm4, %xmm0
-; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm5
+; XOP-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; XOP-NEXT: vpsubq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; XOP-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; XOP-NEXT: vpsubq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpsrlq $1, %xmm1, %xmm6
; XOP-NEXT: vpsrlq $1, %xmm0, %xmm7
; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0
; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9
+; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9
; XOP-NEXT: vpmuludq %xmm0, %xmm9, %xmm0
-; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
-; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
+; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpsllq $32, %xmm0, %xmm0
-; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1
-; XOP-NEXT: vpor %xmm5, %xmm8, %xmm7
+; XOP-NEXT: vpor %xmm4, %xmm8, %xmm7
; XOP-NEXT: vpmuludq %xmm7, %xmm1, %xmm1
-; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
-; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
+; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpsllq $32, %xmm1, %xmm1
-; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1
-; XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOP-NEXT: retq
@@ -900,36 +900,36 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa (%rdi), %xmm2
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm5
; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm2
; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm3
-; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpsubq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsubq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsrlq $1, %xmm3, %xmm6
; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm7
; AVX1-NEXT: vpsrlq $33, %xmm2, %xmm2
; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9
+; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9
; AVX1-NEXT: vpmuludq %xmm2, %xmm9, %xmm2
-; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
-; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
+; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; AVX1-NEXT: vpsrlq $33, %xmm3, %xmm3
-; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7
+; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7
; AVX1-NEXT: vpmuludq %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
-; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
+; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3
-; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -937,20 +937,20 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin
; AVX2-LABEL: vec256_i64_signed_reg_mem:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1]
+; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpsubq %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4
; AVX2-NEXT: vpsrlq $33, %ymm1, %ymm1
-; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vpaddq %ymm1, %ymm3, %ymm1
-; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
+; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2
+; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
+; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2
; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -960,36 +960,36 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1
; XOP-NEXT: vmovdqa (%rdi), %xmm2
; XOP-NEXT: vmovdqa 16(%rdi), %xmm3
-; XOP-NEXT: vpcomgtq %xmm2, %xmm0, %xmm4
+; XOP-NEXT: vpcomgtq %xmm3, %xmm1, %xmm4
+; XOP-NEXT: vpcomgtq %xmm2, %xmm0, %xmm5
; XOP-NEXT: vpsubq %xmm2, %xmm0, %xmm2
-; XOP-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; XOP-NEXT: vpsubq %xmm2, %xmm4, %xmm2
-; XOP-NEXT: vpcomgtq %xmm3, %xmm1, %xmm5
+; XOP-NEXT: vpxor %xmm5, %xmm2, %xmm2
+; XOP-NEXT: vpsubq %xmm2, %xmm5, %xmm2
; XOP-NEXT: vpsubq %xmm3, %xmm1, %xmm3
-; XOP-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; XOP-NEXT: vpsubq %xmm3, %xmm5, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpsubq %xmm3, %xmm4, %xmm3
; XOP-NEXT: vpsrlq $1, %xmm3, %xmm6
; XOP-NEXT: vpsrlq $1, %xmm2, %xmm7
; XOP-NEXT: vpsrlq $33, %xmm2, %xmm2
; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9
+; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9
; XOP-NEXT: vpmuludq %xmm2, %xmm9, %xmm2
-; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
-; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
+; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; XOP-NEXT: vpsllq $32, %xmm2, %xmm2
-; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; XOP-NEXT: vpsrlq $33, %xmm3, %xmm3
-; XOP-NEXT: vpor %xmm5, %xmm8, %xmm7
+; XOP-NEXT: vpor %xmm4, %xmm8, %xmm7
; XOP-NEXT: vpmuludq %xmm7, %xmm3, %xmm3
-; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
-; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
+; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; XOP-NEXT: vpsllq $32, %xmm3, %xmm3
-; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpaddq %xmm3, %xmm1, %xmm1
-; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOP-NEXT: retq
@@ -1071,36 +1071,36 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX1-NEXT: vmovdqa 16(%rsi), %xmm1
; AVX1-NEXT: vmovdqa (%rdi), %xmm2
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5
; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpsubq %xmm0, %xmm4, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm6
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm7
; AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9
+; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9
; AVX1-NEXT: vpmuludq %xmm0, %xmm9, %xmm0
-; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
-; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
+; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
-; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm1
-; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7
+; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7
; AVX1-NEXT: vpmuludq %xmm7, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
-; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
+; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -1109,20 +1109,20 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-NEXT: vmovdqa (%rsi), %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1]
+; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpsubq %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4
; AVX2-NEXT: vpsrlq $33, %ymm1, %ymm1
-; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vpaddq %ymm1, %ymm3, %ymm1
-; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
+; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2
+; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
+; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2
; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -1133,36 +1133,36 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; XOP-NEXT: vmovdqa 16(%rsi), %xmm1
; XOP-NEXT: vmovdqa (%rdi), %xmm2
; XOP-NEXT: vmovdqa 16(%rdi), %xmm3
-; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm4
+; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm4
+; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm5
; XOP-NEXT: vpsubq %xmm0, %xmm2, %xmm0
-; XOP-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; XOP-NEXT: vpsubq %xmm0, %xmm4, %xmm0
-; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm5
+; XOP-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; XOP-NEXT: vpsubq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; XOP-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; XOP-NEXT: vpsubq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpsrlq $1, %xmm1, %xmm6
; XOP-NEXT: vpsrlq $1, %xmm0, %xmm7
; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0
; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9
+; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9
; XOP-NEXT: vpmuludq %xmm0, %xmm9, %xmm0
-; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
-; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
+; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpsllq $32, %xmm0, %xmm0
-; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1
-; XOP-NEXT: vpor %xmm5, %xmm8, %xmm7
+; XOP-NEXT: vpor %xmm4, %xmm8, %xmm7
; XOP-NEXT: vpmuludq %xmm7, %xmm1, %xmm1
-; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
-; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
+; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpsllq $32, %xmm1, %xmm1
-; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1
-; XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOP-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
index a730ef519c01..a478577155f1 100644
--- a/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-512 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK
declare <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr)
declare <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr)
diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
index 583e16351652..62613d773a36 100644
--- a/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-256 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK
define <2 x i64> @test_mm_movrsb_epu8(ptr %__A) {
; CHECK-LABEL: test_mm_movrsb_epu8:
diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
index 9e398096bfcc..693d1992091b 100644
--- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
+++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
@@ -93,10 +93,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: por %xmm0, %xmm3
-; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm3, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [715827883,715827883,715827883,715827883]
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
; SSE2-NEXT: retq
;
; SSE4-LABEL: p4_vector_urem_by_const__splat:
@@ -104,9 +102,9 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-NEXT: psrld $1, %xmm0
-; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882]
-; SSE4-NEXT: pminud %xmm0, %xmm1
-; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
+; SSE4-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE4-NEXT: movdqa %xmm1, %xmm0
; SSE4-NEXT: retq
;
; AVX2-LABEL: p4_vector_urem_by_const__splat:
@@ -116,9 +114,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882]
-; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
+; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
%t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
%t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 8d155bd57df1..1e3204dfc999 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -16,9 +16,9 @@
; CHECK-NEXT: Target Pass Configuration
; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Target Transform Information
+; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
-; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Machine Branch Probability Analysis
diff --git a/llvm/test/CodeGen/X86/peep-test-5.ll b/llvm/test/CodeGen/X86/peep-test-5.ll
index 52bcbe9f83d7..a4af93b81023 100644
--- a/llvm/test/CodeGen/X86/peep-test-5.ll
+++ b/llvm/test/CodeGen/X86/peep-test-5.ll
@@ -51,3 +51,54 @@ end:
}
declare void @free_object()
+
+; Check TEST instruction would not be combined with CMP.
+define i1 @pr155586(i8 %0) {
+; CHECK-LABEL: pr155586:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpb $1, %dil
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: andb %cl, %al
+; CHECK-NEXT: retq
+entry:
+ %cmp88.not = icmp eq i8 %0, 1
+ %1 = and i8 %0, 1
+ %tobool161.not = icmp eq i8 %1, 0
+ %common.ret.op = select i1 %cmp88.not, i1 false, i1 %tobool161.not
+ ret i1 %common.ret.op
+}
+
+; Check TEST8rr instruction would not be combined with TEST8ri.
+define i32 @pr155828() {
+; CHECK-LABEL: pr155828:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB2_1: # %func_188.exit.i.i
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: movl $1, %eax
+; CHECK-NEXT: testb $1, %cl
+; CHECK-NEXT: jne .LBB2_1
+; CHECK-NEXT: # %bb.2: # %if.else.i.i.i
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %cl, %cl
+; CHECK-NEXT: setg %al
+; CHECK-NEXT: retq
+entry:
+ br label %func_188.exit.i.i
+
+func_188.exit.i.i: ; preds = %func_188.exit.i.i, %entry
+ %or659.i167180.i.i = phi i32 [ 0, %entry ], [ 1, %func_188.exit.i.i ]
+ %conv48.i.i = trunc i32 %or659.i167180.i.i to i8
+ %and.i.i.i = and i32 %or659.i167180.i.i, 1
+ %tobool80.not.i.i.i = icmp eq i32 %and.i.i.i, 0
+ br i1 %tobool80.not.i.i.i, label %if.else.i.i.i, label %func_188.exit.i.i
+
+if.else.i.i.i: ; preds = %func_188.exit.i.i
+ %cmp183.i.i.i = icmp sgt i8 %conv48.i.i, 0
+ %ext = zext i1 %cmp183.i.i.i to i32
+ ret i32 %ext
+}
diff --git a/llvm/test/CodeGen/X86/pr156256.ll b/llvm/test/CodeGen/X86/pr156256.ll
new file mode 100644
index 000000000000..13caa6fee587
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr156256.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX512VL
+
+define <16 x i16> @PR156256(<16 x i32> %a, <16 x i32> %b) {
+; AVX512-LABEL: PR156256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; AVX512-NEXT: vpmovm2d %k0, %zmm0
+; AVX512-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: PR156256:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; AVX512VL-NEXT: vpmovm2d %k0, %zmm0
+; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+ %icmp = icmp ugt <16 x i32> %a, %b
+ %sext = sext <16 x i1> %icmp to <16 x i16>
+ %and = and <16 x i16> %sext, splat (i16 16256)
+ ret <16 x i16> %and
+}
diff --git a/llvm/test/CodeGen/X86/pr156817.ll b/llvm/test/CodeGen/X86/pr156817.ll
new file mode 100644
index 000000000000..80972ecc5abb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr156817.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64 -mattr=+egpr | FileCheck %s --check-prefix=EGPR
+
+define coldcc i32 @foo() nounwind {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq bar@PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: retq
+;
+; EGPR-LABEL: foo:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pushq %rax
+; EGPR-NEXT: callq bar@PLT
+; EGPR-NEXT: popq %r16
+; EGPR-NEXT: retq
+ %1 = tail call coldcc i32 @bar()
+ ret i32 %1
+}
+
+declare coldcc i32 @bar()
diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
index c3c96e822879..6a0c13526ac1 100644
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -260,7 +260,6 @@ define void @verifier_error_reduced_issue38788(i1 %cmp11) {
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_offset %ebx, -8
-; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: jmp .LBB1_1
@@ -272,10 +271,9 @@ define void @verifier_error_reduced_issue38788(i1 %cmp11) {
; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: movl %edx, %ebx
-; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: .LBB1_1: # %for.cond
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: je .LBB1_3
; CHECK-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: xorl %eax, %eax
@@ -283,12 +281,11 @@ define void @verifier_error_reduced_issue38788(i1 %cmp11) {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_3: # %if.end
; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB1_4
; CHECK-NEXT: # %bb.9: # %if.then13
; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %ebx, %eax
; CHECK-NEXT: movl $0, %ebx
; CHECK-NEXT: jne .LBB1_8
diff --git a/llvm/test/CodeGen/X86/pr40289-64bit.ll b/llvm/test/CodeGen/X86/pr40289-64bit.ll
index 58da5258a670..96c8377eb0f0 100644
--- a/llvm/test/CodeGen/X86/pr40289-64bit.ll
+++ b/llvm/test/CodeGen/X86/pr40289-64bit.ll
@@ -6,5 +6,5 @@ define cc 92 < 9 x i64 > @clobber() {
ret < 9 x i64 > undef
; CHECK-LABEL: clobber:
; CHECK-NOT: popq %rsp
- ; CHECK: addq $8, %rsp
+ ; CHECK: popq %rax
}
diff --git a/llvm/test/CodeGen/X86/pr40289.ll b/llvm/test/CodeGen/X86/pr40289.ll
index 851b23c002bd..21e50931b40f 100644
--- a/llvm/test/CodeGen/X86/pr40289.ll
+++ b/llvm/test/CodeGen/X86/pr40289.ll
@@ -6,5 +6,5 @@ define < 3 x i32 > @clobber() {
ret < 3 x i32 > undef
; CHECK-LABEL: clobber:
; CHECK-NOT: popl %esp
- ; CHECK: addl $4, %esp
+ ; CHECK: popl %eax
}
diff --git a/llvm/test/CodeGen/X86/pr67333.ll b/llvm/test/CodeGen/X86/pr67333.ll
index 946380971988..accdd04f084d 100644
--- a/llvm/test/CodeGen/X86/pr67333.ll
+++ b/llvm/test/CodeGen/X86/pr67333.ll
@@ -7,19 +7,25 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
define void @SHA256_Compress_Generic(ptr noundef %ctx) #1 {
; CHECK-LABEL: SHA256_Compress_Generic:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movbel 0, %eax
-; CHECK-NEXT: movbel 12(%rdi), %ecx
+; CHECK-NEXT: movl 0, %eax
+; CHECK-NEXT: #APP
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movl 12(%rdi), %ecx
+; CHECK-NEXT: #APP
+; CHECK-NEXT: bswapl %ecx
+; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vmovd %eax, %xmm0
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,0,1,2,3,128,128,128,128,128,128,128,128]
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2
; CHECK-NEXT: vpsrld $17, %xmm2, %xmm0
; CHECK-NEXT: vpslld $15, %xmm2, %xmm3
-; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
-; CHECK-NEXT: vpsrld $19, %xmm2, %xmm3
+; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm3
+; CHECK-NEXT: vpsrld $19, %xmm2, %xmm0
; CHECK-NEXT: vpslld $13, %xmm2, %xmm4
-; CHECK-NEXT: vpor %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm3
-; CHECK-NEXT: vpxor %xmm2, %xmm3, %xmm0
+; CHECK-NEXT: vpor %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm4
; CHECK-NEXT: vpshufb %xmm1, %xmm4, %xmm1
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/pr90844.ll b/llvm/test/CodeGen/X86/pr90844.ll
deleted file mode 100644
index b250c3f6f9a2..000000000000
--- a/llvm/test/CodeGen/X86/pr90844.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-evex512 < %s | FileCheck %s
-
-define void @PR90844() {
-; CHECK-LABEL: PR90844:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovaps %xmm0, (%rax)
-; CHECK-NEXT: retq
-entry:
- %0 = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> poison, <2 x i32> poison, <2 x i32> <i32 8, i32 24>)
- %1 = and <2 x i32> %0, <i32 16711935, i32 -134152448>
- %2 = or disjoint <2 x i32> zeroinitializer, %1
- %3 = zext <2 x i32> %2 to <2 x i64>
- %4 = shl nuw <2 x i64> %3, <i64 32, i64 32>
- %5 = or disjoint <2 x i64> %4, zeroinitializer
- store <2 x i64> %5, ptr poison, align 16
- ret void
-}
-
-define void @foo(ptr %0) {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; CHECK-NEXT: vpxor 32(%rdi), %ymm0, %ymm1
-; CHECK-NEXT: vpxor (%rdi), %ymm0, %ymm0
-; CHECK-NEXT: vmovdqa %ymm0, (%rdi)
-; CHECK-NEXT: vmovdqa %ymm1, 32(%rdi)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-entry:
- %1 = load <32 x half>, ptr %0
- %2 = fneg <32 x half> %1
- store <32 x half> %2, ptr %0
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 9323cd5b1917..7462c7748282 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -938,3 +938,206 @@ define i128 @lshr_shl_mask(i128 %a0) {
%2 = lshr i128 %1, 1
ret i128 %2
}
+
+define i128 @shift_i128_limited_shamt(i128 noundef %a, i32 noundef %b) nounwind {
+; i686-LABEL: shift_i128_limited_shamt:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $16, %esp
+; i686-NEXT: movl 32(%ebp), %ebx
+; i686-NEXT: movl 28(%ebp), %edi
+; i686-NEXT: movzbl 40(%ebp), %ecx
+; i686-NEXT: movb $6, %dl
+; i686-NEXT: subb %cl, %dl
+; i686-NEXT: addb $-7, %cl
+; i686-NEXT: movl %edi, %eax
+; i686-NEXT: shrl %eax
+; i686-NEXT: shrl %cl, %eax
+; i686-NEXT: movl %edx, %ecx
+; i686-NEXT: shll %cl, %ebx
+; i686-NEXT: orl %eax, %ebx
+; i686-NEXT: movl 24(%ebp), %esi
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: shldl %cl, %esi, %edi
+; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT: movl 8(%ebp), %edi
+; i686-NEXT: movl 36(%ebp), %esi
+; i686-NEXT: movl 32(%ebp), %edx
+; i686-NEXT: shldl %cl, %edx, %esi
+; i686-NEXT: movl %esi, 12(%edi)
+; i686-NEXT: movl %ebx, 8(%edi)
+; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT: movl %ecx, 4(%edi)
+; i686-NEXT: movl %eax, (%edi)
+; i686-NEXT: movl %edi, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: movq %rdi, %rax
+; x86_64-NEXT: movb $6, %cl
+; x86_64-NEXT: subb %dl, %cl
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nuw nsw i32 6, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_no_nuw(i128 noundef %a, i32 noundef %b) nounwind {
+; i686-LABEL: shift_i128_limited_shamt_no_nuw:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $48, %esp
+; i686-NEXT: movzbl 40(%ebp), %eax
+; i686-NEXT: movl 24(%ebp), %ecx
+; i686-NEXT: movl 28(%ebp), %edx
+; i686-NEXT: movl 32(%ebp), %esi
+; i686-NEXT: movl 36(%ebp), %edi
+; i686-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; i686-NEXT: movb $6, %cl
+; i686-NEXT: subb %al, %cl
+; i686-NEXT: movl %ecx, %eax
+; i686-NEXT: shrb $3, %al
+; i686-NEXT: andb $12, %al
+; i686-NEXT: negb %al
+; i686-NEXT: movsbl %al, %eax
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, (%esp)
+; i686-NEXT: movl 20(%esp,%eax), %edx
+; i686-NEXT: movl 24(%esp,%eax), %ebx
+; i686-NEXT: movl %ebx, %edi
+; i686-NEXT: shldl %cl, %edx, %edi
+; i686-NEXT: movl 16(%esp,%eax), %esi
+; i686-NEXT: movl 28(%esp,%eax), %eax
+; i686-NEXT: shldl %cl, %ebx, %eax
+; i686-NEXT: movl 8(%ebp), %ebx
+; i686-NEXT: movl %eax, 12(%ebx)
+; i686-NEXT: movl %edi, 8(%ebx)
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: shldl %cl, %esi, %edx
+; i686-NEXT: movl %edx, 4(%ebx)
+; i686-NEXT: movl %eax, (%ebx)
+; i686-NEXT: movl %ebx, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_no_nuw:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: movb $6, %cl
+; x86_64-NEXT: subb %dl, %cl
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rdi
+; x86_64-NEXT: xorl %eax, %eax
+; x86_64-NEXT: testb $64, %cl
+; x86_64-NEXT: cmovneq %rdi, %rsi
+; x86_64-NEXT: cmoveq %rdi, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nsw i32 6, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_unknown_lhs(i128 noundef %a, i32 noundef %b, i32 noundef %c) nounwind {
+; i686-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $48, %esp
+; i686-NEXT: movl 24(%ebp), %eax
+; i686-NEXT: movl 28(%ebp), %edx
+; i686-NEXT: movl 32(%ebp), %esi
+; i686-NEXT: movl 36(%ebp), %edi
+; i686-NEXT: movl 44(%ebp), %ecx
+; i686-NEXT: subl 40(%ebp), %ecx
+; i686-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, (%esp)
+; i686-NEXT: movl %ecx, %eax
+; i686-NEXT: shrb $3, %al
+; i686-NEXT: andb $12, %al
+; i686-NEXT: negb %al
+; i686-NEXT: movsbl %al, %eax
+; i686-NEXT: movl 20(%esp,%eax), %edx
+; i686-NEXT: movl 24(%esp,%eax), %ebx
+; i686-NEXT: movl %ebx, %edi
+; i686-NEXT: shldl %cl, %edx, %edi
+; i686-NEXT: movl 16(%esp,%eax), %esi
+; i686-NEXT: movl 28(%esp,%eax), %eax
+; i686-NEXT: shldl %cl, %ebx, %eax
+; i686-NEXT: movl 8(%ebp), %ebx
+; i686-NEXT: movl %eax, 12(%ebx)
+; i686-NEXT: movl %edi, 8(%ebx)
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: # kill: def $cl killed $cl killed $ecx
+; i686-NEXT: shldl %cl, %esi, %edx
+; i686-NEXT: movl %edx, 4(%ebx)
+; i686-NEXT: movl %eax, (%ebx)
+; i686-NEXT: movl %ebx, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: subl %edx, %ecx
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rdi
+; x86_64-NEXT: xorl %eax, %eax
+; x86_64-NEXT: testb $64, %cl
+; x86_64-NEXT: cmovneq %rdi, %rsi
+; x86_64-NEXT: cmoveq %rdi, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nuw nsw i32 %c, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
index 825a11d66cd4..8d99ad07e22e 100644
--- a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2 | FileCheck %s
define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test_int_x86_vsm4key4128:
diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll b/llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll
index 4b0f63f9a638..cd576b19f876 100644
--- a/llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll
@@ -8,10 +8,10 @@ declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>)
declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>)
-declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
-declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <16 x i8>, <16 x i8>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <32 x i8>, <32 x i8>)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <16 x i8>, <16 x i8>)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <32 x i8>, <32 x i8>)
define <4 x i32> @stack_fold_vpdpwssd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
; CHECK-LABEL: stack_fold_vpdpwssd:
@@ -125,7 +125,7 @@ define <8 x i32> @stack_fold_vpdpwssds_256_commuted(<8 x i32> %a0, <8 x i32> %a1
ret <8 x i32> %2
}
-define <4 x i32> @stack_fold_vpdpbusd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+define <4 x i32> @stack_fold_vpdpbusd(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: stack_fold_vpdpbusd:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -135,11 +135,11 @@ define <4 x i32> @stack_fold_vpdpbusd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a
; CHECK-NEXT: {vex} vpdpbusd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
+ %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2)
ret <4 x i32> %2
}
-define <4 x i32> @stack_fold_vpdpbusd_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+define <4 x i32> @stack_fold_vpdpbusd_commuted(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: stack_fold_vpdpbusd_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -150,11 +150,11 @@ define <4 x i32> @stack_fold_vpdpbusd_commuted(<4 x i32> %a0, <4 x i32> %a1, <4
; CHECK-NEXT: {vex} vpdpbusd %xmm1, %xmm2, %xmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1)
+ %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <16 x i8> %a2, <16 x i8> %a1)
ret <4 x i32> %2
}
-define <8 x i32> @stack_fold_vpdpbusd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
+define <8 x i32> @stack_fold_vpdpbusd_256(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) {
; CHECK-LABEL: stack_fold_vpdpbusd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -164,11 +164,11 @@ define <8 x i32> @stack_fold_vpdpbusd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32
; CHECK-NEXT: {vex} vpdpbusd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2)
+ %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2)
ret <8 x i32> %2
}
-define <8 x i32> @stack_fold_vpdpbusd_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
+define <8 x i32> @stack_fold_vpdpbusd_256_commuted(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) {
; CHECK-LABEL: stack_fold_vpdpbusd_256_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -179,11 +179,11 @@ define <8 x i32> @stack_fold_vpdpbusd_256_commuted(<8 x i32> %a0, <8 x i32> %a1,
; CHECK-NEXT: {vex} vpdpbusd %ymm1, %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1)
+ %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <32 x i8> %a2, <32 x i8> %a1)
ret <8 x i32> %2
}
-define <4 x i32> @stack_fold_vpdpbusds(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+define <4 x i32> @stack_fold_vpdpbusds(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: stack_fold_vpdpbusds:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -193,11 +193,11 @@ define <4 x i32> @stack_fold_vpdpbusds(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %
; CHECK-NEXT: {vex} vpdpbusds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
+ %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2)
ret <4 x i32> %2
}
-define <4 x i32> @stack_fold_vpdpbusds_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+define <4 x i32> @stack_fold_vpdpbusds_commuted(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: stack_fold_vpdpbusds_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -208,11 +208,11 @@ define <4 x i32> @stack_fold_vpdpbusds_commuted(<4 x i32> %a0, <4 x i32> %a1, <4
; CHECK-NEXT: {vex} vpdpbusds %xmm1, %xmm2, %xmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1)
+ %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <16 x i8> %a2, <16 x i8> %a1)
ret <4 x i32> %2
}
-define <8 x i32> @stack_fold_vpdpbusds_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
+define <8 x i32> @stack_fold_vpdpbusds_256(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) {
; CHECK-LABEL: stack_fold_vpdpbusds_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -222,11 +222,11 @@ define <8 x i32> @stack_fold_vpdpbusds_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i3
; CHECK-NEXT: {vex} vpdpbusds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2)
+ %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2)
ret <8 x i32> %2
}
-define <8 x i32> @stack_fold_vpdpbusds_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
+define <8 x i32> @stack_fold_vpdpbusds_256_commuted(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) {
; CHECK-LABEL: stack_fold_vpdpbusds_256_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -237,6 +237,6 @@ define <8 x i32> @stack_fold_vpdpbusds_256_commuted(<8 x i32> %a0, <8 x i32> %a1
; CHECK-NEXT: {vex} vpdpbusds %ymm1, %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1)
+ %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <32 x i8> %a2, <32 x i8> %a1)
ret <8 x i32> %2
}
diff --git a/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
index 35688e59fc9f..766ccdbada53 100644
--- a/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
@@ -79,7 +79,7 @@ define <8 x half> @f11(<2 x double> %a0, <8 x half> %a1) #0 {
; CHECK-LABEL: f11:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; CHECK-NEXT: ret{{[l|q]}}
%ext = extractelement <2 x double> %a0, i32 0
%cvt = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %ext,
@@ -140,7 +140,7 @@ define <8 x half> @f17(<4 x float> %a0, <8 x half> %a1) #0 {
; CHECK-LABEL: f17:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; CHECK-NEXT: ret{{[l|q]}}
%ext = extractelement <4 x float> %a0, i32 0
%cvt = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %ext,
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 6b8a03ba5eb7..762900e0bb18 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -9,8 +9,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX512VLVBMI2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX512VLVBMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s --check-prefixes=AVX512VLVBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 6fbc10307e0b..0b98a9388adc 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -6,9 +6,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10,AVX10_256
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s --check-prefixes=AVX512VLVBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2
@@ -118,10 +117,10 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: var_funnnel_v4i64:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: var_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -273,10 +272,10 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt)
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: var_funnnel_v8i32:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: var_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -426,10 +425,10 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: var_funnnel_v16i16:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -680,34 +679,6 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX512VLVBMI2-LABEL: var_funnnel_v32i8:
-; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
-; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
-; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm0
-; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512VLVBMI2-NEXT: vpsllvw %zmm0, %zmm3, %zmm0
-; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512VLVBMI2-NEXT: retq
-;
-; AVX10_256-LABEL: var_funnnel_v32i8:
-; AVX10_256: # %bb.0:
-; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX10_256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
-; AVX10_256-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31]
-; AVX10_256-NEXT: vpsllvw %ymm5, %ymm3, %ymm3
-; AVX10_256-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23]
-; AVX10_256-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
-; AVX10_256-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX10_256-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
-; AVX10_256-NEXT: retq
-;
; XOPAVX1-LABEL: var_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
@@ -840,11 +811,11 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatvar_funnnel_v4i64:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX10-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -957,11 +928,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
; AVX512VLBW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm3[1,3],ymm0[5,7],ymm3[5,7]
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatvar_funnnel_v8i32:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpbroadcastd %xmm2, %ymm2
-; AVX10-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -1078,11 +1049,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatvar_funnnel_v16i16:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX10-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -1212,17 +1183,17 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatvar_funnnel_v32i8:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX10-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX10-NEXT: vpsllw %xmm2, %ymm3, %ymm3
-; AVX10-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX10-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX10-NEXT: vpsllw %xmm2, %ymm0, %ymm0
-; AVX10-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX10-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
+; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm3, %ymm3
+; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm3, %ymm3
+; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
; XOPAVX1: # %bb.0:
@@ -1452,25 +1423,25 @@ define void @fancierRotate2(ptr %arr, ptr %control, i32 %rot0, i32 %rot1) {
; AVX512VLBW-NEXT: vzeroupper
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: fancierRotate2:
-; AVX10: # %bb.0: # %entry
-; AVX10-NEXT: vpbroadcastd %edx, %ymm0
-; AVX10-NEXT: vpbroadcastd %ecx, %ymm1
-; AVX10-NEXT: movq $-1024, %rax # imm = 0xFC00
-; AVX10-NEXT: .p2align 4
-; AVX10-NEXT: .LBB8_1: # %loop
-; AVX10-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX10-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; AVX10-NEXT: vptestnmb %xmm2, %xmm2, %k1
-; AVX10-NEXT: vpblendmd %ymm0, %ymm1, %ymm2 {%k1}
-; AVX10-NEXT: vmovdqu 4096(%rdi,%rax,4), %ymm3
-; AVX10-NEXT: vprolvd %ymm2, %ymm3, %ymm2
-; AVX10-NEXT: vmovdqu %ymm2, 4096(%rdi,%rax,4)
-; AVX10-NEXT: addq $8, %rax
-; AVX10-NEXT: jne .LBB8_1
-; AVX10-NEXT: # %bb.2: # %exit
-; AVX10-NEXT: vzeroupper
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: fancierRotate2:
+; AVX512VLVBMI2: # %bb.0: # %entry
+; AVX512VLVBMI2-NEXT: vpbroadcastd %edx, %ymm0
+; AVX512VLVBMI2-NEXT: vpbroadcastd %ecx, %ymm1
+; AVX512VLVBMI2-NEXT: movq $-1024, %rax # imm = 0xFC00
+; AVX512VLVBMI2-NEXT: .p2align 4
+; AVX512VLVBMI2-NEXT: .LBB8_1: # %loop
+; AVX512VLVBMI2-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX512VLVBMI2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; AVX512VLVBMI2-NEXT: vptestnmb %xmm2, %xmm2, %k1
+; AVX512VLVBMI2-NEXT: vpblendmd %ymm0, %ymm1, %ymm2 {%k1}
+; AVX512VLVBMI2-NEXT: vmovdqu 4096(%rdi,%rax,4), %ymm3
+; AVX512VLVBMI2-NEXT: vprolvd %ymm2, %ymm3, %ymm2
+; AVX512VLVBMI2-NEXT: vmovdqu %ymm2, 4096(%rdi,%rax,4)
+; AVX512VLVBMI2-NEXT: addq $8, %rax
+; AVX512VLVBMI2-NEXT: jne .LBB8_1
+; AVX512VLVBMI2-NEXT: # %bb.2: # %exit
+; AVX512VLVBMI2-NEXT: vzeroupper
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: fancierRotate2:
; XOPAVX1: # %bb.0: # %entry
@@ -1623,10 +1594,10 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: constant_funnnel_v4i64:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -1721,10 +1692,10 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: constant_funnnel_v8i32:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -1824,10 +1795,10 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: constant_funnnel_v16i16:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -1947,28 +1918,6 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX512VLVBMI2-LABEL: constant_funnnel_v32i8:
-; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
-; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm2
-; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
-; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512VLVBMI2-NEXT: retq
-;
-; AVX10_256-LABEL: constant_funnnel_v32i8:
-; AVX10_256: # %bb.0:
-; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX10_256-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX10_256-NEXT: vpsrlw $8, %ymm2, %ymm2
-; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX10_256-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX10_256-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX10_256-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
-; AVX10_256-NEXT: retq
-;
; XOPAVX1-LABEL: constant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -2069,10 +2018,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatconstant_funnnel_v4i64:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldq $14, %ymm1, %ymm0, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldq $14, %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -2154,10 +2103,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatconstant_funnnel_v8i32:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldd $4, %ymm1, %ymm0, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldd $4, %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -2239,10 +2188,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatconstant_funnnel_v16i16:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -2330,12 +2279,12 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2))
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatconstant_funnnel_v32i8:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX10-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2))
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2))
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index bf525442a419..20be5791309f 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -9,8 +9,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX512VLVBMI2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX512VLVBMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s --check-prefixes=AVX512VLVBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index b0a1a91bdccc..1f164635910c 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -6,9 +6,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10,AVX10_256
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s --check-prefixes=AVX512VLVBMI2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2
@@ -118,11 +117,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: var_funnnel_v4i64:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: var_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -274,11 +273,11 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt)
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: var_funnnel_v8i32:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: var_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -454,11 +453,11 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: var_funnnel_v16i16:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -720,20 +719,6 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
-; AVX10_256-LABEL: var_funnnel_v32i8:
-; AVX10_256: # %bb.0:
-; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX10_256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
-; AVX10_256-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31]
-; AVX10_256-NEXT: vpsrlvw %ymm5, %ymm3, %ymm3
-; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23]
-; AVX10_256-NEXT: vpsrlvw %ymm1, %ymm0, %ymm1
-; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
-; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
-; AVX10_256-NEXT: retq
-;
; XOPAVX1-LABEL: var_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
@@ -870,12 +855,12 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatvar_funnnel_v4i64:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX10-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -988,12 +973,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
; AVX512VLBW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6]
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatvar_funnnel_v8i32:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpbroadcastd %xmm2, %ymm2
-; AVX10-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -1110,12 +1095,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatvar_funnnel_v16i16:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX10-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512VLVBMI2-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -1265,17 +1250,6 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VLVBMI2-NEXT: retq
;
-; AVX10_256-LABEL: splatvar_funnnel_v32i8:
-; AVX10_256: # %bb.0:
-; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX10_256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX10_256-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
-; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX10_256-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
-; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
-; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
-; AVX10_256-NEXT: retq
-;
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
@@ -1388,11 +1362,11 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: constant_funnnel_v4i64:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -1487,11 +1461,11 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: constant_funnnel_v8i32:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -1591,11 +1565,11 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: constant_funnnel_v16i16:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; AVX10-NEXT: vmovdqa %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -1761,16 +1735,6 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
-; AVX10_256-LABEL: constant_funnnel_v32i8:
-; AVX10_256: # %bb.0:
-; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX10_256-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX10_256-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
-; AVX10_256-NEXT: vpermi2b %ymm2, %ymm1, %ymm0
-; AVX10_256-NEXT: retq
-;
; XOPAVX1-LABEL: constant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -1869,10 +1833,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatconstant_funnnel_v4i64:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdq $14, %ymm0, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdq $14, %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v4i64:
; XOPAVX1: # %bb.0:
@@ -1954,10 +1918,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatconstant_funnnel_v8i32:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdd $4, %ymm0, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdd $4, %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v8i32:
; XOPAVX1: # %bb.0:
@@ -2039,10 +2003,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatconstant_funnnel_v16i16:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpshrdw $7, %ymm0, %ymm1, %ymm0
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshrdw $7, %ymm0, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -2130,12 +2094,12 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2))
; AVX512VLBW-NEXT: retq
;
-; AVX10-LABEL: splatconstant_funnnel_v32i8:
-; AVX10: # %bb.0:
-; AVX10-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX10-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2))
-; AVX10-NEXT: retq
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2))
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
; XOPAVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll b/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll
new file mode 100644
index 000000000000..31949403b446
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s
+; DEBUG-OUTPUT-NOT: .loc
+; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @foo(i32 %n) #0 !dbg !4 {
+entry:
+ %diff = alloca i32, align 4
+ %cb = alloca [16 x i8], align 16
+ %cc = alloca [16 x i8], align 16
+ store i32 0, ptr %diff, align 4, !tbaa !11
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds [16 x i8], ptr %cb, i64 0, i64 %indvars.iv
+ %0 = load i8, ptr %arrayidx, align 1, !tbaa !21
+ %conv = sext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds [16 x i8], ptr %cc, i64 0, i64 %indvars.iv
+ %1 = load i8, ptr %arrayidx2, align 1, !tbaa !21
+ %conv3 = sext i8 %1 to i32
+ %sub = sub i32 %conv, %conv3
+ %add = add nsw i32 %sub, %add8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 16
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !25
+
+for.end: ; preds = %for.body
+ store i32 %add, ptr %diff, align 4, !tbaa !11
+ call void @ibar(ptr %diff) #2
+ ret i32 0
+}
+
+declare void @ibar(ptr) #1
+
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+!llvm.dbg.cu = !{!24}
+
+!1 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !24, scopeLine: 6, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.5.0 "}
+!10 = !DILocation(line: 8, column: 3, scope: !4)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 17, column: 8, scope: !16)
+!16 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !17)
+!17 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !18)
+!18 = distinct !DILexicalBlock(line: 17, column: 3, file: !1, scope: !4)
+!19 = !DILocation(line: 18, column: 5, scope: !20)
+!20 = distinct !DILexicalBlock(line: 17, column: 27, file: !1, scope: !18)
+!21 = !{!13, !13, i64 0}
+!22 = !DILocation(line: 20, column: 3, scope: !4)
+!23 = !DILocation(line: 21, column: 3, scope: !4)
+!24 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, emissionKind: NoDebug)
+!25 = !{!25, !15}
diff --git a/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir b/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir
index de76d90bf6b6..474b77665867 100644
--- a/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir
+++ b/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir
@@ -106,7 +106,7 @@ body: |
# RUN: -x86-wineh-unwindv2-force-mode=1 | FileCheck %s \
# RUN: --check-prefix=BESTEFFORT
# DEALLOC-AFTER-EPILOG: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'dealloc_after_epilog':
-# DEALLOC-AFTER-EPILOG-SAME: Unexpected lea, mov or add instruction after the epilog
+# DEALLOC-AFTER-EPILOG-SAME: Unexpected lea or add instruction after the epilog
--- |
define dso_local void @dealloc_after_epilog() local_unnamed_addr {
@@ -161,6 +161,135 @@ body: |
RET64
...
+;--- mov_no_setframe.mir
+# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \
+# RUN: %t/mov_no_setframe.mir -run-pass=x86-wineh-unwindv2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=MOV-NO-SETFRAME
+# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/mov_no_setframe.mir \
+# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \
+# RUN: FileCheck %s --check-prefix=BESTEFFORT
+# MOV-NO-SETFRAME: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'mov_no_setframe':
+# MOV-NO-SETFRAME-SAME: The epilog is setting frame back, but prolog did not set it
+
+--- |
+ define dso_local void @mov_no_setframe() local_unnamed_addr {
+ entry:
+ ret void
+ }
+ !llvm.module.flags = !{!0}
+ !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
+...
+---
+name: mov_no_setframe
+body: |
+ bb.0.entry:
+ frame-setup SEH_EndPrologue
+ SEH_BeginEpilogue
+ $rsp = MOV64rr $rbp
+ SEH_EndEpilogue
+ RET64
+...
+
+;--- mov_after_epilog.mir
+# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \
+# RUN: %t/mov_after_epilog.mir -run-pass=x86-wineh-unwindv2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=MOV-AFTER-EPILOG
+# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - \
+# RUN: %t/mov_after_epilog.mir -run-pass=x86-wineh-unwindv2 \
+# RUN: -x86-wineh-unwindv2-force-mode=1 | FileCheck %s \
+# RUN: --check-prefix=BESTEFFORT
+# MOV-AFTER-EPILOG: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'mov_after_epilog':
+# MOV-AFTER-EPILOG-SAME: Unexpected mov instruction after the epilog
+
+--- |
+ define dso_local void @mov_after_epilog() local_unnamed_addr {
+ entry:
+ ret void
+ }
+ !llvm.module.flags = !{!0}
+ !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
+...
+---
+name: mov_after_epilog
+body: |
+ bb.0.entry:
+ $rbp = MOV64rr $rsp
+ frame-setup SEH_SetFrame 52, 0
+ frame-setup SEH_EndPrologue
+ SEH_BeginEpilogue
+ SEH_EndEpilogue
+ $rsp = MOV64rr $rbp
+ RET64
+...
+
+;--- pop_before_mov.mir
+# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \
+# RUN: %t/pop_before_mov.mir -run-pass=x86-wineh-unwindv2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=POP-BEFORE-MOV
+# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/pop_before_mov.mir \
+# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \
+# RUN: FileCheck %s --check-prefix=BESTEFFORT
+# POP-BEFORE-MOV: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'pop_before_mov':
+# POP-BEFORE-MOV-SAME: The epilog is setting the frame back after popping registers
+
+--- |
+ define dso_local void @pop_before_mov() local_unnamed_addr {
+ entry:
+ ret void
+ }
+ !llvm.module.flags = !{!0}
+ !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
+...
+---
+name: pop_before_mov
+body: |
+ bb.0.entry:
+ frame-setup PUSH64r killed $rdi, implicit-def $rsp, implicit $rsp
+ frame-setup SEH_PushReg 55
+ $rbp = MOV64rr $rsp
+ frame-setup SEH_SetFrame 52, 0
+ frame-setup SEH_EndPrologue
+ SEH_BeginEpilogue
+ $rdi = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+ $rsp = MOV64rr $rbp
+ SEH_EndEpilogue
+ RET64
+...
+
+;--- mov_after_dealloc.mir
+# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \
+# RUN: %t/mov_after_dealloc.mir -run-pass=x86-wineh-unwindv2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=MOV-AFTER-DEALLOC
+# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/mov_after_dealloc.mir \
+# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \
+# RUN: FileCheck %s --check-prefix=BESTEFFORT
+# MOV-AFTER-DEALLOC: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'mov_after_dealloc':
+# MOV-AFTER-DEALLOC-SAME: Cannot set the frame back after the stack allocation has been deallocated
+
+--- |
+ define dso_local void @mov_after_dealloc() local_unnamed_addr {
+ entry:
+ ret void
+ }
+ !llvm.module.flags = !{!0}
+ !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
+...
+---
+name: mov_after_dealloc
+body: |
+ bb.0.entry:
+ $rbp = MOV64rr $rsp
+ frame-setup SEH_SetFrame 52, 0
+ $rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags
+ frame-setup SEH_StackAlloc 40
+ frame-setup SEH_EndPrologue
+ SEH_BeginEpilogue
+ $rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags
+ $rsp = MOV64rr $rbp
+ SEH_EndEpilogue
+ RET64
+...
+
;--- too_many_pops.mir
# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - %t/too_many_pops.mir \
# RUN: -run-pass=x86-wineh-unwindv2 2>&1 | FileCheck %s \
diff --git a/llvm/test/CodeGen/X86/win64-eh-unwindv2.ll b/llvm/test/CodeGen/X86/win64-eh-unwindv2.ll
index 326127a919f3..0d92d044e1b9 100644
--- a/llvm/test/CodeGen/X86/win64-eh-unwindv2.ll
+++ b/llvm/test/CodeGen/X86/win64-eh-unwindv2.ll
@@ -171,9 +171,44 @@ define dso_local void @large_aligned_alloc() align 16 {
; CHECK-NEXT: retq
; CHECK-NEXT: .seh_endproc
+define dso_local void @set_frame_only() local_unnamed_addr {
+ tail call i64 @llvm.x86.flags.read.u64()
+ ret void
+}
+
+; CHECK-LABEL: set_frame_only:
+; CHECK: .seh_unwindversion 2
+; CHECK: .seh_pushreg %rbp
+; CHECK: .seh_setframe %rbp, 0
+; CHECK: .seh_endprologue
+; CHECK-NOT: .seh_endproc
+; CHECK: .seh_startepilogue
+; CHECK-NEXT: .seh_unwindv2start
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: retq
+; CHECK-NEXT: .seh_endproc
+
+attributes #1 = { noreturn }
+define dso_local void @no_return_func() local_unnamed_addr #1 {
+entry:
+ call void @d()
+ unreachable
+}
+; CHECK-LABEL: no_return_func:
+; CHECK-NOT: .seh_unwindversion 2
+; CHECK: .seh_stackalloc
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NOT: .seh_startepilogue
+; CHECK-NOT: .seh_unwindv2start
+; CHECK: int3
+; CHECK-NEXT: .seh_endproc
+
+declare i64 @llvm.x86.flags.read.u64()
declare void @a() local_unnamed_addr
declare i32 @b() local_unnamed_addr
declare i32 @c(i32) local_unnamed_addr
+declare void @d() local_unnamed_addr #1
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"winx64-eh-unwindv2", i32 1}
+!0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
diff --git a/llvm/test/CodeGen/X86/xor-not-combine.ll b/llvm/test/CodeGen/X86/xor-not-combine.ll
new file mode 100644
index 000000000000..af65ade35ce8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/xor-not-combine.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; Test for DAG combine: fold (not (sub Y, X)) -> (add X, ~Y)
+; when Y is a constant.
+
+; Test case 1: Y is a constant - should transform to (add X, ~Y)
+define i32 @test_not_sub_constant(i32 %x) {
+; CHECK-LABEL: test_not_sub_constant:
+; CHECK: # %bb.0:
+; CHECK: leal -101(%rdi), %eax
+; CHECK-NEXT: retq
+ %sub = sub i32 100, %x
+ %not = xor i32 %sub, -1
+ ret i32 %not
+}
+
+; Test case 2: Y is not a constant - should NOT optimize
+define i32 @test_not_sub_non_constant(i32 %x, i32 %y) {
+; CHECK-LABEL: test_not_sub_non_constant:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: notl %eax
+; CHECK-NEXT: retq
+ %sub = sub i32 %y, %x
+ %not = xor i32 %sub, -1
+ ret i32 %not
+}