diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/test/CodeGen/X86 | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/test/CodeGen/X86')
107 files changed, 4894 insertions, 1410 deletions
diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll index 8d690ba06e3b..654169377609 100644 --- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll +++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll @@ -13,25 +13,24 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: pushl %eax -; CHECK-NEXT: movb $1, %cl +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb.i5 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: addl %ebx, %ebx -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb $1, %cl ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %mp_unexp_mp2d.exit.i ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.5: # %cond_next.i -; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: jne .LBB0_3 ; CHECK-NEXT: # %bb.6: # %cond_next36.i ; CHECK-NEXT: movl $0, 0 -; CHECK-NEXT: movzbl %al, %ebp +; CHECK-NEXT: movzbl %cl, %ebp ; CHECK-NEXT: andl $1, %ebp ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: xorl %eax, %eax diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll index bf939c413108..3913e93b83a6 100644 --- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll @@ -38,7 +38,6 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct ; CHECK-NEXT: ## %bb.1: ## %bb116.i ; CHECK-NEXT: je LBB0_25 ; CHECK-NEXT: ## %bb.2: ## %bb52.i.i -; CHECK-NEXT: testb $1, %bl ; CHECK-NEXT: je LBB0_25 ; CHECK-NEXT: ## %bb.3: ## %bb142.i ; CHECK-NEXT: je LBB0_25 @@ -49,23 +48,23 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct ; CHECK-NEXT: jmp LBB0_5 ; CHECK-NEXT: LBB0_21: ## %bb7806 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: Ltmp16: +; CHECK-NEXT: Ltmp16: ## EH_LABEL ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll __ZN12wxStringBase6appendEmw -; CHECK-NEXT: Ltmp17: +; CHECK-NEXT: Ltmp17: ## EH_LABEL ; CHECK-NEXT: LBB0_5: ## %bb3261 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpl $37, 0 ; CHECK-NEXT: jne LBB0_25 ; CHECK-NEXT: ## %bb.6: ## %bb3306 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: Ltmp0: ## EH_LABEL ; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll __ZN12wxStringBaseaSEPKw -; CHECK-NEXT: Ltmp1: +; CHECK-NEXT: Ltmp1: ## EH_LABEL ; CHECK-NEXT: ## %bb.7: ## %bb3314 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: movl 0, %eax @@ -89,11 +88,11 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct ; CHECK-NEXT: je LBB0_14 ; CHECK-NEXT: ## %bb.13: ## %bb155.i8541 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: Ltmp4: ## EH_LABEL ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll _gmtime_r -; CHECK-NEXT: Ltmp5: +; CHECK-NEXT: Ltmp5: ## EH_LABEL ; CHECK-NEXT: LBB0_14: ## %bb182.i8560 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: testb $1, %bl @@ -103,7 +102,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct ; CHECK-NEXT: je LBB0_18 ; CHECK-NEXT: ## %bb.17: ## %bb440.i8663 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: Ltmp6: +; CHECK-NEXT: Ltmp6: ## EH_LABEL ; CHECK-NEXT: movl L_.str4$non_lazy_ptr, %eax ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl L_.str33$non_lazy_ptr, %eax @@ -113,47 +112,47 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct ; CHECK-NEXT: movl %ebp, (%esp) ; CHECK-NEXT: movl $1717, {{[0-9]+}}(%esp) ## imm = 0x6B5 ; CHECK-NEXT: calll __Z10wxOnAssertPKwiPKcS0_S0_ -; CHECK-NEXT: Ltmp7: +; CHECK-NEXT: Ltmp7: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_18 ; CHECK-NEXT: LBB0_15: ## %bb187.i8591 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: jne LBB0_25 ; CHECK-NEXT: LBB0_18: ## %invcont5814 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: Ltmp8: +; CHECK-NEXT: Ltmp8: ## EH_LABEL ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz ; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: Ltmp9: +; CHECK-NEXT: Ltmp9: ## EH_LABEL ; CHECK-NEXT: ## %bb.19: ## %invcont5831 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: Ltmp10: +; CHECK-NEXT: Ltmp10: ## EH_LABEL ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll __ZN12wxStringBase10ConcatSelfEmPKwm -; CHECK-NEXT: Ltmp11: +; CHECK-NEXT: Ltmp11: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_5 ; CHECK-NEXT: LBB0_9: ## %bb5657 -; CHECK-NEXT: Ltmp13: +; CHECK-NEXT: Ltmp13: ## EH_LABEL ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE -; CHECK-NEXT: Ltmp14: +; CHECK-NEXT: Ltmp14: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_20: ## %bb5968 -; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: Ltmp2: ## EH_LABEL ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz ; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: Ltmp3: ## EH_LABEL ; CHECK-NEXT: LBB0_25: ## %bb115.critedge.i ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: addl $28, %esp @@ -163,13 +162,13 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl $4 ; CHECK-NEXT: LBB0_23: ## %lpad.loopexit.split-lp -; CHECK-NEXT: Ltmp15: +; CHECK-NEXT: Ltmp15: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_24: ## %lpad8185 -; CHECK-NEXT: Ltmp12: +; CHECK-NEXT: Ltmp12: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_22: ## %lpad.loopexit -; CHECK-NEXT: Ltmp18: +; CHECK-NEXT: Ltmp18: ## EH_LABEL ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: Lfunc_end0: entry: diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll index 320c96535abb..2bda8db04029 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll @@ -139,12 +139,12 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind { ; O0-NEXT: callq foo ; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload ; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload +; O0-NEXT: movl $32, %esi +; O0-NEXT: movl $buf+2048, %edx ; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; O0-NEXT: movl $32, %esi -; O0-NEXT: movl $buf+2048, %edx ; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 ; O0-NEXT: movl $64, %esi ; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx diff --git a/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir b/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir new file mode 100644 index 000000000000..82049dce8a45 --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir @@ -0,0 +1,152 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass="fastpretileconfig,regallocfast,fasttileconfig" -verify-machineinstrs -o - %s | FileCheck %s + +# Test to verify that ldtilecfg instructions are sinked closer to tile defining +# instructions after a call. This ensures call does not overwrite values in +# registers being used for configuring the AMX tile. + +... +--- +name: test_api +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64, preferred-register: '', flags: [ ] } + - { id: 1, class: gr64, preferred-register: '', flags: [ ] } + - { id: 2, class: gr64, preferred-register: '', flags: [ ] } + - { id: 3, class: gr64, preferred-register: '', flags: [ ] } + - { id: 4, class: tile, preferred-register: '', flags: [ ] } + - { id: 5, class: gr64_nosp, preferred-register: '', flags: [ ] } + - { id: 6, class: gr64, preferred-register: '', flags: [ ] } + - { id: 9, class: gr64_nosp, preferred-register: '', flags: [ ] } + - { id: 10, class: gr64, preferred-register: '', flags: [ ] } + - { id: 13, class: tile, preferred-register: '', flags: [ ] } + - { id: 14, class: gr64_nosp, preferred-register: '', flags: [ ] } + - { id: 15, class: gr64, preferred-register: '', flags: [ ] } + - { id: 18, class: gr64, preferred-register: '', flags: [ ] } + - { id: 19, class: gr64_nosp, preferred-register: '', flags: [ ] } + - { id: 22, class: gr64, preferred-register: '', flags: [ ] } + - { id: 23, class: gr64, preferred-register: '', flags: [ ] } + - { id: 24, class: gr64, preferred-register: '', flags: [ ] } + - { id: 25, class: tile, preferred-register: '', flags: [ ] } + - { id: 26, class: gr64_nosp, preferred-register: '', flags: [ ] } + - { id: 29, class: gr64_nosp, preferred-register: '', flags: [ ] } + - { id: 30, class: gr64, preferred-register: '', flags: [ ] } + - { id: 33, class: tile, preferred-register: '', flags: [ ] } + - { id: 34, class: gr64_nosp, preferred-register: '', flags: [ ] } + - { id: 35, class: gr64, preferred-register: '', flags: [ ] } + - { id: 38, class: gr64_nosp, preferred-register: '', flags: [ ] } + - { id: 39, class: gr64, preferred-register: '', flags: [ ] } + - { id: 40, class: gr16, preferred-register: '', flags: [ ] } + - { id: 41, class: gr16, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%2' } +frameInfo: + adjustsStack: true + maxAlignment: 1024 +stack: + - { id: 0, size: 1024, alignment: 1024 } + - { id: 1, size: 1024, alignment: 1024 } + - { id: 2, size: 32, alignment: 32 } + - { id: 3, size: 32, alignment: 32 } + - { id: 4, size: 8, alignment: 8 } +machineFunctionInfo: + amxProgModel: ManagedRA +body: | + bb.0.entry: + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: test_api + ; CHECK: liveins: $rdi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $zmm0 = AVX512_512_SET0 + ; CHECK-NEXT: VMOVUPSZmr %stack.5, 1, $noreg, 0, $noreg, killed renamable $zmm0 :: (store (s512) into %stack.5, align 4) + ; CHECK-NEXT: MOV8mi %stack.5, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.5, align 4) + ; CHECK-NEXT: MOV64mr %stack.8, 1, $noreg, 0, $noreg, $rsi :: (store (s64) into %stack.8) + ; CHECK-NEXT: renamable $rsi = MOV32ri64 16 + ; CHECK-NEXT: renamable $rdx = LEA64r %stack.2, 1, $noreg, 0, $noreg + ; CHECK-NEXT: renamable $cx = MOV16ri 16 + ; CHECK-NEXT: MOV16mr %stack.7, 1, $noreg, 0, $noreg, $cx :: (store (s16) into %stack.7) + ; CHECK-NEXT: renamable $ax = MOV16ri 2 + ; CHECK-NEXT: MOV16mr %stack.6, 1, $noreg, 0, $noreg, $ax :: (store (s16) into %stack.6) + ; CHECK-NEXT: $al = IMPLICIT_DEF + ; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4) + ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4) + ; CHECK-NEXT: $al = IMPLICIT_DEF + ; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4) + ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.5, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.5, align 4) + ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg + ; CHECK-NEXT: renamable $rsi = MOV32ri64 64 + ; CHECK-NEXT: renamable $rdx = LEA64r %stack.1, 1, $noreg, 0, $noreg + ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0 + ; CHECK-NEXT: renamable $rsi = MOV32ri64 64 + ; CHECK-NEXT: renamable $rdx = LEA64r %stack.1, 1, $noreg, 0, $noreg + ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg + ; CHECK-NEXT: renamable $rdx = MOV32ri64 16 + ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdi, 1, killed renamable $rdx, 0, $noreg, killed renamable $tmm0 + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def dead $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: CALL64pcrel32 &foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax + ; CHECK-NEXT: $rsi = MOV64rm %stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %stack.8) + ; CHECK-NEXT: $cx = MOV16rm %stack.7, 1, $noreg, 0, $noreg :: (load (s16) from %stack.7) + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def dead $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: renamable $rdx = COPY $rax + ; CHECK-NEXT: $ax = MOV16rm %stack.6, 1, $noreg, 0, $noreg :: (load (s16) from %stack.6) + ; CHECK-NEXT: MOV64mr killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $rdx + ; CHECK-NEXT: renamable $rdx = MOV64rm %stack.4, 1, $noreg, 0, $noreg + ; CHECK-NEXT: renamable $rsi = MOV32ri64 16 + ; CHECK-NEXT: $al = IMPLICIT_DEF + ; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4) + ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4) + ; CHECK-NEXT: $al = IMPLICIT_DEF + ; CHECK-NEXT: MOV8mr %stack.5, 1, $noreg, 48, $noreg, $al :: (store (s512) into %stack.5 + 48, align 4) + ; CHECK-NEXT: MOV16mr %stack.5, 1, $noreg, 16, $noreg, $cx :: (store (s512) into %stack.5 + 16, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.5, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.5, align 4) + ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg + ; CHECK-NEXT: renamable $rsi = MOV32ri64 64 + ; CHECK-NEXT: renamable $rdx = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: PTILESTOREDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0 + ; CHECK-NEXT: renamable $rsi = MOV32ri64 64 + ; CHECK-NEXT: renamable $rdx = LEA64r %stack.0, 1, $noreg, 0, $noreg + ; CHECK-NEXT: renamable $tmm0 = PTILELOADDV renamable $ax, renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg + ; CHECK-NEXT: renamable $rsi = MOV32ri64 16 + ; CHECK-NEXT: renamable $rdx = LEA64r %stack.4, 1, $noreg, 0, $noreg + ; CHECK-NEXT: PTILESTOREDV killed renamable $ax, killed renamable $cx, killed renamable $rdx, 1, killed renamable $rsi, 0, $noreg, killed renamable $tmm0 + ; CHECK-NEXT: RET64 + %2:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %1:gr64 = COPY killed %0 + %3:gr64 = COPY killed %2 + %38:gr64_nosp = MOV32ri64 16 + %39:gr64 = LEA64r %stack.2, 1, $noreg, 0, $noreg + %40:gr16 = MOV16ri 16 + %41:gr16 = MOV16ri 2 + %33:tile = PTILELOADDV %41:gr16, %40:gr16, killed %39, 1, killed %38, 0, $noreg + %34:gr64_nosp = MOV32ri64 64 + %35:gr64 = LEA64r %stack.1, 1, $noreg, 0, $noreg + PTILESTOREDV %41:gr16, %40:gr16, killed %35, 1, killed %34, 0, $noreg, %33 + %29:gr64_nosp = MOV32ri64 64 + %30:gr64 = LEA64r %stack.1, 1, $noreg, 0, $noreg + %25:tile = PTILELOADDV %41:gr16, %40:gr16, killed %30, 1, killed %29, 0, $noreg + %26:gr64_nosp = MOV32ri64 16 + PTILESTOREDV %41:gr16, %40:gr16, %1, 1, killed %26, 0, $noreg, %25 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + CALL64pcrel32 &foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax + ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + %24:gr64 = COPY $rax + MOV64mr %3, 1, $noreg, 0, $noreg, %24 + %22:gr64 = MOV64rm %stack.4, 1, $noreg, 0, $noreg + %19:gr64_nosp = MOV32ri64 16 + %13:tile = PTILELOADDV %41:gr16, %40:gr16, %22, 1, killed %19, 0, $noreg + %14:gr64_nosp = MOV32ri64 64 + %15:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg + PTILESTOREDV %41:gr16, %40:gr16, killed %15, 1, killed %14, 0, $noreg, %13 + %9:gr64_nosp = MOV32ri64 64 + %10:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg + %4:tile = PTILELOADDV %41:gr16, %40:gr16, killed %10, 1, killed %9, 0, $noreg + %5:gr64_nosp = MOV32ri64 16 + %6:gr64 = LEA64r %stack.4, 1, $noreg, 0, $noreg + PTILESTOREDV %41:gr16, %40:gr16, killed %6, 1, killed %5, 0, $noreg, %4 + RET64 +... diff --git a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll index 71f8f231747f..885bc805d655 100644 --- a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll +++ b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s -; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s @buf = dso_local global [3072 x i8] zeroinitializer, align 64 diff --git a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll index 8f82bd2587ec..41208d6adb30 100644 --- a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2-512 | FileCheck %s +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2 | FileCheck %s define <16 x float> @test_tcvtrowd2ps(i32 %A) { ; CHECK-LABEL: test_tcvtrowd2ps: diff --git a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll index fd3925fabc51..dc8252ae7aca 100644 --- a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll +++ b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx10.2-512, \ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx10.2, \ ; RUN: -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s define void @test_amx(i8* %pointer, i8* %base, i32 %index, i64 %stride) { diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index e52ce6ca815b..b2651e91134e 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -229,3 +229,21 @@ entry: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %p, i32 1, <1 x i1> %1) ret void } + +define i64 @redundant_test(i64 %num, ptr %p1, i64 %in) { +; CHECK-LABEL: redundant_test: +; CHECK: # %bb.0: +; CHECK-NEXT: testl $-32, %edi +; CHECK-NEXT: cfcmoveq (%rsi), %rax +; CHECK-NEXT: {nf} addq %rdx, %rax +; CHECK-NEXT: cmovneq %rdi, %rax +; CHECK-NEXT: retq + %and = and i64 %num, 4294967264 + %cmp = icmp eq i64 %and, 0 + %mask = bitcast i1 %cmp to <1 x i1> + %condload = tail call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %p1, i32 8, <1 x i1> %mask, <1 x i64> poison) + %v = bitcast <1 x i64> %condload to i64 + %add = add i64 %v, %in + %sel = select i1 %cmp, i64 %add, i64 %num + ret i64 %sel +} diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll index ad24608d338a..d6d4db350910 100644 --- a/llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll +++ b/llvm/test/CodeGen/X86/apx/push2-pop2-cfi-seh.ll @@ -81,7 +81,7 @@ define i32 @csr6_alloc16(ptr %argv) { ; LIN-NEXT: .cfi_def_cfa_offset 32 ; LIN-NEXT: pop2 %rbp, %r15 ; LIN-NEXT: .cfi_def_cfa_offset 16 -; LIN-NEXT: popq %rcx +; LIN-NEXT: popq %rax ; LIN-NEXT: .cfi_def_cfa_offset 8 ; LIN-NEXT: retq ; @@ -116,7 +116,7 @@ define i32 @csr6_alloc16(ptr %argv) { ; LIN-PPX-NEXT: .cfi_def_cfa_offset 32 ; LIN-PPX-NEXT: pop2p %rbp, %r15 ; LIN-PPX-NEXT: .cfi_def_cfa_offset 16 -; LIN-PPX-NEXT: popq %rcx +; LIN-PPX-NEXT: popq %rax ; LIN-PPX-NEXT: .cfi_def_cfa_offset 8 ; LIN-PPX-NEXT: retq ; @@ -180,7 +180,7 @@ define i32 @csr6_alloc16(ptr %argv) { ; WIN-NEXT: pop2 %rbp, %rbx ; WIN-NEXT: pop2 %r13, %r12 ; WIN-NEXT: pop2 %r15, %r14 -; WIN-NEXT: popq %rcx +; WIN-NEXT: popq %rax ; WIN-NEXT: .seh_endepilogue ; WIN-NEXT: retq ; WIN-NEXT: .seh_endproc @@ -211,7 +211,7 @@ define i32 @csr6_alloc16(ptr %argv) { ; WIN-PPX-NEXT: pop2p %rbp, %rbx ; WIN-PPX-NEXT: pop2p %r13, %r12 ; WIN-PPX-NEXT: pop2p %r15, %r14 -; WIN-PPX-NEXT: popq %rcx +; WIN-PPX-NEXT: popq %rax ; WIN-PPX-NEXT: .seh_endepilogue ; WIN-PPX-NEXT: retq ; WIN-PPX-NEXT: .seh_endproc diff --git a/llvm/test/CodeGen/X86/avg-mask.ll b/llvm/test/CodeGen/X86/avg-mask.ll index e8866393e8b6..b148cd3d42df 100644 --- a/llvm/test/CodeGen/X86/avg-mask.ll +++ b/llvm/test/CodeGen/X86/avg-mask.ll @@ -177,11 +177,11 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin ; AVX512F-NEXT: shrq $32, %rdi ; AVX512F-NEXT: shrq $48, %rax ; AVX512F-NEXT: shrl $16, %ecx -; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512F-NEXT: vpavgb %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: kmovw %ecx, %k2 ; AVX512F-NEXT: kmovw %eax, %k3 ; AVX512F-NEXT: kmovw %edi, %k4 @@ -364,11 +364,11 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nou ; AVX512F: # %bb.0: ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: shrl $16, %edi -; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512F-NEXT: vpavgw %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: kmovw %edi, %k2 ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 diff --git a/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll b/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll index ab8ac4fbd419..b43b1f7b9c32 100644 --- a/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll +++ b/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx10.2-512 | FileCheck %s +; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx10.2 | FileCheck %s define <8 x bfloat> @fma_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) { ; CHECK-LABEL: fma_123_v8bf16: diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll index 0f90f1a0a356..566ce533683f 100644 --- a/llvm/test/CodeGen/X86/avx10_2-cmp.ll +++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefix=X86 define i1 @hoeq(half %x, half %y) { ; X64-LABEL: hoeq: diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll index c22a394e6c4e..79849a7153c9 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 define <32 x bfloat> @test_int_x86_avx10_vaddbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) { ; CHECK-LABEL: test_int_x86_avx10_vaddbf16512: diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll index cbac76e9de27..9225bd88b089 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 declare <32 x bfloat> @llvm.x86.avx10.vminbf16512(<32 x bfloat>, <32 x bfloat>) diff --git a/llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll index c4a904cc3bc4..cc87ae0aad1f 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 define <32 x half> @test_int_x86_avx10_vcvt2ps2phx512(<16 x float> %A, <16 x float> %B) { ; CHECK-LABEL: test_int_x86_avx10_vcvt2ps2phx512: diff --git a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll index d7ad7b048c6d..c50da22193b2 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 ; VCVTTPD2DQS define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind { diff --git a/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll index b7713128f472..c27ee1680dea 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512minmax-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=X86 define <32 x bfloat> @test_int_x86_avx10_vminmaxbf16512(<32 x bfloat> %A, <32 x bfloat> %B) nounwind { ; X64-LABEL: test_int_x86_avx10_vminmaxbf16512: diff --git a/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll index b2e7caa15944..09eb53faaaad 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 ; VNNI FP16 diff --git a/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll index 8430b2e1c028..2e69b41d282b 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 define dso_local <8 x i64> @test_mm512_ipcvtbf16_epi8(<32 x bfloat> noundef %__A) { ; CHECK-LABEL: test_mm512_ipcvtbf16_epi8: diff --git a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll index 652c35c77709..591349aabef4 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512(<8 x double> %x0, <8 x i32> %src, i8 %mask) { diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll index 435f67a0f1e4..0f2c75b15d5b 100644 --- a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll +++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 define <16 x bfloat> @test_int_x86_avx10_add_bf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) { ; CHECK-LABEL: test_int_x86_avx10_add_bf16_256: @@ -1168,23 +1168,10 @@ entry: } define <32 x bfloat> @addv(<32 x bfloat> %a, <32 x bfloat> %b) nounwind { -; X64-LABEL: addv: -; X64: # %bb.0: -; X64-NEXT: vaddbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2] -; X64-NEXT: vaddbf16 %ymm3, %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xcb] -; X64-NEXT: retq # encoding: [0xc3] -; -; X86-LABEL: addv: -; X86: # %bb.0: -; X86-NEXT: pushl %ebp # encoding: [0x55] -; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-NEXT: andl $-32, %esp # encoding: [0x83,0xe4,0xe0] -; X86-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] -; X86-NEXT: vaddbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2] -; X86-NEXT: vaddbf16 8(%ebp), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0x8d,0x08,0x00,0x00,0x00] -; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-NEXT: popl %ebp # encoding: [0x5d] -; X86-NEXT: retl # encoding: [0xc3] +; CHECK-LABEL: addv: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %add = fadd <32 x bfloat> %a, %b ret <32 x bfloat> %add } diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll index ba32b2adc799..3efc8cc3d129 100644 --- a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 declare <16 x bfloat> @llvm.x86.avx10.vminbf16256(<16 x bfloat>, <16 x bfloat>) diff --git a/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll index 90e2146cc2c0..04c93eb1ee6d 100644 --- a/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 define <8 x half> @test_int_x86_avx10_vcvt2ps2phx128(<4 x float> %A, <4 x float> %B) { ; CHECK-LABEL: test_int_x86_avx10_vcvt2ps2phx128: diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll index a2f167e94cc2..e0c2139b5e37 100644 --- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll +++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 ; ; 32-bit float to signed integer diff --git a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll index 916d439ab77f..8ae5b670764e 100644 --- a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=X86 define <8 x bfloat> @test_int_x86_avx10_vminmaxbf16128(<8 x bfloat> %A, <8 x bfloat> %B) nounwind { ; X64-LABEL: test_int_x86_avx10_vminmaxbf16128: diff --git a/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll index ed5ae01448c5..0c5fd3bf9d24 100644 --- a/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 ; VNNI FP16 diff --git a/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll index 957523f87b7c..094637270503 100644 --- a/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 define dso_local <2 x i64> @test_mm_ipcvtbf16_epi8(<8 x bfloat> noundef %__A) { ; CHECK-LABEL: test_mm_ipcvtbf16_epi8: diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll index e9b739074b45..38d54cff6dc2 100644 --- a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,X86 define i32 @test_x86_avx512_vcvttsd2usis(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_vcvttsd2usis: diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll index f5be929bc85c..c853da5d2168 100644 --- a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s define i64 @test_x86_avx512_vcvttsd2si64(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_vcvttsd2si64: diff --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll index 33819c9e0102..97ca0d88b7d4 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,EVEX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s --check-prefixes=CHECK,EVEX256 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK ; 256-bit @@ -237,32 +236,19 @@ define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) { } define i16 @PR90356(<16 x i1> %a) { -; EVEX512-LABEL: PR90356: -; EVEX512: # %bb.0: -; EVEX512-NEXT: vpsllw $7, %xmm0, %xmm0 -; EVEX512-NEXT: vpmovb2m %xmm0, %k1 -; EVEX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; EVEX512-NEXT: movb $63, %al -; EVEX512-NEXT: kmovd %eax, %k1 -; EVEX512-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} -; EVEX512-NEXT: vptestmd %zmm0, %zmm0, %k0 -; EVEX512-NEXT: kmovd %k0, %eax -; EVEX512-NEXT: # kill: def $ax killed $ax killed $eax -; EVEX512-NEXT: vzeroupper -; EVEX512-NEXT: retq -; -; EVEX256-LABEL: PR90356: -; EVEX256: # %bb.0: -; EVEX256-NEXT: vpsllw $7, %xmm0, %xmm0 -; EVEX256-NEXT: vpmovb2m %xmm0, %k0 -; EVEX256-NEXT: vpmovm2w %k0, %ymm0 -; EVEX256-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; EVEX256-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] -; EVEX256-NEXT: vpmovw2m %ymm0, %k0 -; EVEX256-NEXT: kmovd %k0, %eax -; EVEX256-NEXT: # kill: def $ax killed $ax killed $eax -; EVEX256-NEXT: vzeroupper -; EVEX256-NEXT: retq +; CHECK-LABEL: PR90356: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 +; CHECK-NEXT: vpmovb2m %xmm0, %k1 +; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 +; CHECK-NEXT: movb $63, %al +; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %1 = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> %2 = bitcast <16 x i1> %1 to i16 ret i16 %2 diff --git a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll index e449c7192e4b..b60d7a5463d6 100644 --- a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll @@ -278,14 +278,14 @@ define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4 ret <4 x float> %res } -define <4 x float> @PR98306() { +define <4 x float> @PR98306(i8 %m) { ; CHECK-LABEL: PR98306: ; CHECK: ## %bb.0: -; CHECK-NEXT: kxorw %k0, %k0, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7] ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9] ; CHECK-NEXT: vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq - %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 0, i32 4) + %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 %m, i32 4) ret <4 x float> %res } diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll index a2af7df44010..d09807e4a334 100644 --- a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefixes=AVX102 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind { diff --git a/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll b/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll index 56d923d7c4cf..4a5c1fe5a2a0 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll @@ -57,7 +57,7 @@ define <8 x half> @minsh(<8 x half> %va, ptr %pb) { ; CHECK-LABEL: minsh: ; CHECK: # %bb.0: ; CHECK-NEXT: vminsh (%rdi), %xmm0, %xmm1 -; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmovsh {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; CHECK-NEXT: retq %a = extractelement <8 x half> %va, i32 0 %b = load half, ptr %pb @@ -70,7 +70,7 @@ define <8 x half> @maxsh(<8 x half> %va, ptr %pb) { ; CHECK-LABEL: maxsh: ; CHECK: # %bb.0: ; CHECK-NEXT: vminsh (%rdi), %xmm0, %xmm1 -; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmovsh {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; CHECK-NEXT: retq %a = extractelement <8 x half> %va, i32 0 %b = load half, ptr %pb diff --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll index 627a94799424..44ea3ce64ccf 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll @@ -1361,3 +1361,19 @@ define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ret <32 x half> %res } + +define <8 x half> @PR153570(ptr %p) { +; CHECK-LABEL: PR153570: +; CHECK: # %bb.0: +; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; CHECK-NEXT: vmulsh {rn-sae}, %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] +; CHECK-NEXT: vmovaps %xmm1, (%rdi) +; CHECK-NEXT: retq + %r = tail call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <8 x half> <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, i8 0, i32 8) + store <8 x half> %r, ptr %p, align 16 + %r1 = tail call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <8 x half> <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, i8 1, i32 8) + ret <8 x half> %r1 +} diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll index 526511c85045..316e3f27a0a1 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -303,7 +303,7 @@ define <8 x half> @test14(half %x) { ; X64-LABEL: test14: ; X64: # %bb.0: ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; X64-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X64-NEXT: retq ; ; X86-LABEL: test14: @@ -318,7 +318,7 @@ define <16 x half> @test14b(half %x) { ; X64VL-LABEL: test14b: ; X64VL: # %bb.0: ; X64VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64VL-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; X64VL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X64VL-NEXT: retq ; ; X86-LABEL: test14b: @@ -329,7 +329,7 @@ define <16 x half> @test14b(half %x) { ; X64-NOVL-LABEL: test14b: ; X64-NOVL: # %bb.0: ; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NOVL-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; X64-NOVL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; X64-NOVL-NEXT: retq @@ -341,7 +341,7 @@ define <32 x half> @test14c(half %x) { ; X64VL-LABEL: test14c: ; X64VL: # %bb.0: ; X64VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64VL-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; X64VL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X64VL-NEXT: retq ; ; X86-LABEL: test14c: @@ -352,7 +352,7 @@ define <32 x half> @test14c(half %x) { ; X64-NOVL-LABEL: test14c: ; X64-NOVL: # %bb.0: ; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NOVL-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; X64-NOVL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0 ; X64-NOVL-NEXT: retq @@ -1464,21 +1464,21 @@ define <8 x half> @movsh(<8 x half> %a, <8 x half> %b) { ; X64VL-LABEL: movsh: ; X64VL: # %bb.0: ; X64VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,14,15,0,1,2,3,4,5,6,7,14,15,10,11] -; X64VL-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; X64VL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X64VL-NEXT: vaddph %xmm0, %xmm2, %xmm0 ; X64VL-NEXT: retq ; ; X86-LABEL: movsh: ; X86: # %bb.0: ; X86-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,14,15,0,1,2,3,4,5,6,7,14,15,10,11] -; X86-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X86-NEXT: vaddph %xmm0, %xmm2, %xmm0 ; X86-NEXT: retl ; ; X64-NOVL-LABEL: movsh: ; X64-NOVL: # %bb.0: ; X64-NOVL-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,14,15,0,1,2,3,4,5,6,7,14,15,10,11] -; X64-NOVL-NEXT: vmovsh %xmm0, %xmm1, %xmm3 +; X64-NOVL-NEXT: vmovsh {{.*#+}} xmm3 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X64-NOVL-NEXT: vpsrldq {{.*#+}} xmm4 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X64-NOVL-NEXT: vpsrldq {{.*#+}} xmm5 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X64-NOVL-NEXT: vaddsh %xmm4, %xmm5, %xmm4 @@ -2311,7 +2311,7 @@ define <8 x half> @test21(half %a, half %b, half %c) nounwind { ; X64-LABEL: test21: ; X64: # %bb.0: ; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X64-NEXT: vmovsh %xmm2, %xmm3, %xmm2 +; X64-NEXT: vmovsh {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7] ; X64-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],zero,zero ; X64-NEXT: retq @@ -2427,7 +2427,7 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width ; X64VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ; X64VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; X64VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64VL-NEXT: vmovsh %xmm0, %xmm2, %xmm0 +; X64VL-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; X64VL-NEXT: retq ; ; X86-LABEL: pr52561: @@ -2443,7 +2443,7 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width ; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl @@ -2474,7 +2474,7 @@ define <8 x i16> @pr59628_xmm(i16 %arg) { ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; X86-NEXT: vpbroadcastw %eax, %xmm1 -; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; X86-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %k1 ; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll index 7613c9ff43e2..b8ebe2a4890a 100644 --- a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll @@ -2,18 +2,18 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 -declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>) +declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <32 x i8>, <32 x i8>) -define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) { ; CHECK-LABEL: test_int_x86_avx512_vpdpbusd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) ret <8 x i32> %1 } -define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, ptr %x2p, <32 x i8> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: ; X86: # %bb.0: ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] @@ -33,11 +33,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> ; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda] ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] - %x2 = load <8 x i32>, ptr %x2p - %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + %x2 = load <32 x i8>, ptr %x2p + %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) %2 = bitcast i8 %x3 to <8 x i1> %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0 - %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4) + %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x4) %5 = bitcast i8 %x3 to <8 x i1> %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer %res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 @@ -45,18 +45,18 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> ret { <8 x i32>, <8 x i32> } %res2 } -declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <16 x i8>, <16 x i8>) -define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) { ; CHECK-LABEL: test_int_x86_avx512_vpdpbusd_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) ret <4 x i32> %1 } -define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, ptr %x2p, <16 x i8> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: ; X86: # %bb.0: ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] @@ -76,12 +76,12 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> ; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda] ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] - %x2 = load <4 x i32>, ptr %x2p - %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + %x2 = load <16 x i8>, ptr %x2p + %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) %2 = bitcast i8 %x3 to <8 x i1> %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0 - %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4) + %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x4) %5 = bitcast i8 %x3 to <8 x i1> %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer @@ -90,18 +90,18 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> ret { <4 x i32>, <4 x i32> } %res2 } -declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>) +declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <32 x i8>, <32 x i8>) -define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) { ; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) ret <8 x i32> %1 } -define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, ptr %x2p, <32 x i8> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: ; X86: # %bb.0: ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] @@ -121,11 +121,11 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32 ; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda] ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] - %x2 = load <8 x i32>, ptr %x2p - %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + %x2 = load <32 x i8>, ptr %x2p + %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) %2 = bitcast i8 %x3 to <8 x i1> %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0 - %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4) + %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x4) %5 = bitcast i8 %x3 to <8 x i1> %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer %res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 @@ -133,18 +133,18 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32 ret { <8 x i32>, <8 x i32> } %res2 } -declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <16 x i8>, <16 x i8>) -define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) { ; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) ret <4 x i32> %1 } -define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, ptr %x2p, <16 x i8> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: ; X86: # %bb.0: ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] @@ -164,12 +164,12 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32 ; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda] ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] - %x2 = load <4 x i32>, ptr %x2p - %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + %x2 = load <16 x i8>, ptr %x2p + %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) %2 = bitcast i8 %x3 to <8 x i1> %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0 - %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4) + %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x4) %5 = bitcast i8 %x3 to <8 x i1> %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll index 62c4d39e8615..63ff88a7fa4a 100644 --- a/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll @@ -2,20 +2,31 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 -declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) -declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) define <16 x i32>@test_int_x86_avx512_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { ; CHECK-LABEL: test_int_x86_avx512_vpdpbusd_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) ret <16 x i32> %res } -define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_maskz_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { +; X86-LABEL: test_int_x86_avx512_maskz_vpdpbusd_512: ; X86: # %bb.0: ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] @@ -25,7 +36,7 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i ; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; -; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: +; X64-LABEL: test_int_x86_avx512_maskz_vpdpbusd_512: ; X64: # %bb.0: ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] @@ -41,20 +52,31 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i ret { <16 x i32>, <16 x i32> } %res3 } -declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) -declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { ; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) ret <16 x i32> %res } -define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { -; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_maskz_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { +; X86-LABEL: test_int_x86_avx512_maskz_vpdpbusds_512: ; X86: # %bb.0: ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] @@ -64,7 +86,7 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x ; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; -; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: +; X64-LABEL: test_int_x86_avx512_maskz_vpdpbusds_512: ; X64: # %bb.0: ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll index 21d0010ff630..60d0298e057f 100644 --- a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll @@ -2,18 +2,18 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 -declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>) +declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <64 x i8>, <64 x i8>) -define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { +define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) { ; CHECK-LABEL: test_int_x86_avx512_ask_vpdpbusd_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) + %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) ret <16 x i32> %1 } -define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <64 x i8> %x1, ptr %x2p, <64 x i8> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: ; X86: # %bb.0: ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] @@ -32,11 +32,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i ; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xda] ; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] - %x2 = load <16 x i32>, ptr %x2p - %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) + %x2 = load <64 x i8>, ptr %x2p + %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) %2 = bitcast i16 %x3 to <16 x i1> %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 - %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) + %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x4) %5 = bitcast i16 %x3 to <16 x i1> %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 @@ -44,18 +44,18 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i ret { <16 x i32>, <16 x i32> } %res2 } -declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>) +declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <64 x i8>, <64 x i8>) -define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { +define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) { ; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) + %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) ret <16 x i32> %1 } -define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <64 x i8> %x1, ptr %x2p, <64 x i8> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: ; X86: # %bb.0: ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] @@ -74,11 +74,11 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x ; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xda] ; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] - %x2 = load <16 x i32>, ptr %x2p - %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) + %x2 = load <64 x i8>, ptr %x2p + %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x2) %2 = bitcast i16 %x3 to <16 x i1> %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 - %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) + %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <64 x i8> %x1, <64 x i8> %x4) %5 = bitcast i16 %x3 to <16 x i1> %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 diff --git a/llvm/test/CodeGen/X86/avx_vnni-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx_vnni-intrinsics-upgrade.ll new file mode 100644 index 000000000000..0f4a4f27b971 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx_vnni-intrinsics-upgrade.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s + +declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +; CHECK-LABEL: test_int_x86_avx_vpdpbusd_128: +; CHECK: # %bb.0: +; CHECK-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x50,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +; CHECK-LABEL: test_int_x86_avx_vpdpbusd_256: +; CHECK: # %bb.0: +; CHECK-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x50,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + ret <8 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +; CHECK-LABEL: test_int_x86_avx_vpdpbusds_128: +; CHECK: # %bb.0: +; CHECK-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x51,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +; CHECK-LABEL: test_int_x86_avx_vpdpbusds_256: +; CHECK: # %bb.0: +; CHECK-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x51,0xc2] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll index a1db6e54fa79..de8b2a41bf8c 100644 --- a/llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll @@ -4,9 +4,9 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=AVXVNNI ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=AVX512VNNI -declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>) +declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <32 x i8>, <32 x i8>) -define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) { ; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusd_256: ; AVXVNNI: # %bb.0: ; AVXVNNI-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x50,0xc2] @@ -16,13 +16,13 @@ define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 ; AVX512VNNI: # %bb.0: ; AVX512VNNI-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x50,0xc2] ; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) ret <8 x i32> %res } -declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <16 x i8>, <16 x i8>) -define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) { ; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusd_128: ; AVXVNNI: # %bb.0: ; AVXVNNI-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x50,0xc2] @@ -32,13 +32,13 @@ define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 ; AVX512VNNI: # %bb.0: ; AVX512VNNI-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x50,0xc2] ; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) ret <4 x i32> %res } -declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>) +declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <32 x i8>, <32 x i8>) -define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) { ; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusds_256: ; AVXVNNI: # %bb.0: ; AVXVNNI-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x51,0xc2] @@ -48,13 +48,13 @@ define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 ; AVX512VNNI: # %bb.0: ; AVX512VNNI-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x51,0xc2] ; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <32 x i8> %x1, <32 x i8> %x2) ret <8 x i32> %res } -declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <16 x i8>, <16 x i8>) -define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) { ; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusds_128: ; AVXVNNI: # %bb.0: ; AVXVNNI-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x51,0xc2] @@ -64,7 +64,7 @@ define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 ; AVX512VNNI: # %bb.0: ; AVX512VNNI-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x51,0xc2] ; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <16 x i8> %x1, <16 x i8> %x2) ret <4 x i32> %res } diff --git a/llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll b/llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll index 8601d454215a..abdc296ae1e1 100644 --- a/llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avxvnniint16-intrinsics.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxvnniint16 | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avxvnniint16 | FileCheck %s -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefix=AVX10 -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefix=AVX10 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefix=AVX10 +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2 | FileCheck %s --check-prefix=AVX10 define <4 x i32> @test_int_x86_avx2_vpdpwsud_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { ; CHECK-LABEL: test_int_x86_avx2_vpdpwsud_128: diff --git a/llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll b/llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll index 607720fbc3f3..0ddd0171a58a 100644 --- a/llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxvnniint8 --show-mc-encoding | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnniint8 --show-mc-encoding | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 --show-mc-encoding | FileCheck %s --check-prefixes=AVX10-X64 declare <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32>, <4 x i32>, <4 x i32>) diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll index 423f2c49e70e..474be4465d9b 100644 --- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll +++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll @@ -654,3 +654,110 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) { %2 = sext <64 x i1> %1 to <64 x i8> ret <64 x i8> %2 } + +define <8 x i32> @PR157382(ptr %p0, ptr %p1, ptr %p2) { +; SSE2-SSSE3-LABEL: PR157382: +; SSE2-SSSE3: # %bb.0: +; SSE2-SSSE3-NEXT: movdqu (%rdi), %xmm3 +; SSE2-SSSE3-NEXT: movdqu 16(%rdi), %xmm2 +; SSE2-SSSE3-NEXT: movdqu (%rsi), %xmm0 +; SSE2-SSSE3-NEXT: movdqu 16(%rsi), %xmm4 +; SSE2-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm5 +; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm6 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm7 +; SSE2-SSSE3-NEXT: pxor %xmm7, %xmm6 +; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm8 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm8 +; SSE2-SSSE3-NEXT: pxor %xmm7, %xmm8 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm0 +; SSE2-SSSE3-NEXT: por %xmm6, %xmm0 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm4 +; SSE2-SSSE3-NEXT: por %xmm8, %xmm4 +; SSE2-SSSE3-NEXT: packssdw %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-SSSE3-NEXT: pcmpeqb %xmm5, %xmm1 +; SSE2-SSSE3-NEXT: pxor %xmm7, %xmm1 +; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 +; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 +; SSE2-SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; SSE2-SSSE3-NEXT: pslld $31, %xmm1 +; SSE2-SSSE3-NEXT: psrad $31, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: retq +; +; AVX1-LABEL: PR157382: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 +; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpcmpgtd %xmm5, %xmm2, %xmm5 +; AVX1-NEXT: vpxor %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vmovdqu (%rsi), %xmm6 +; AVX1-NEXT: vmovdqu 16(%rsi), %xmm7 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm6, %xmm6 +; AVX1-NEXT: vpor %xmm6, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm7, %xmm6 +; AVX1-NEXT: vpor %xmm6, %xmm5, %xmm5 +; AVX1-NEXT: vpackssdw %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpmovsxbw %xmm1, %xmm1 +; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpmovsxwd %xmm1, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; AVX1-NEXT: vpmovsxwd %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR157382: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; AVX2-NEXT: vmovdqu (%rsi), %ymm1 +; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpgtd %ymm0, %ymm3, %ymm4 +; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 +; AVX2-NEXT: vpxor %ymm5, %ymm4, %ymm4 +; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpor %ymm1, %ymm4, %ymm1 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2 +; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: PR157382: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpcmpnltd %ymm2, %ymm0, %k0 +; AVX512-NEXT: vpcmpltd (%rsi), %ymm2, %k1 +; AVX512-NEXT: vptestmb %xmm1, %xmm1, %k2 +; AVX512-NEXT: korw %k1, %k0, %k0 +; AVX512-NEXT: korw %k2, %k0, %k1 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq + %ld0 = load <8 x i32>, ptr %p0, align 1 + %ld1 = load <8 x i32>, ptr %p1, align 1 + %ld2 = load <8 x i8>, ptr %p2, align 1 + %cmp0 = icmp sge <8 x i32> %ld0, zeroinitializer + %cmp1 = icmp sgt <8 x i32> %ld1, zeroinitializer + %cmp2 = icmp ne <8 x i8> %ld2, zeroinitializer + %cmp01 = or <8 x i1> %cmp0, %cmp1 + %cmp012 = or <8 x i1> %cmp01, %cmp2 + %res = select <8 x i1> %cmp012, <8 x i32> %ld0, <8 x i32> zeroinitializer + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/X86/bswap-inline-asm.ll b/llvm/test/CodeGen/X86/bswap-inline-asm.ll index f8f154c0688f..a9ce616b7ecc 100644 --- a/llvm/test/CodeGen/X86/bswap-inline-asm.ll +++ b/llvm/test/CodeGen/X86/bswap-inline-asm.ll @@ -1,88 +1,150 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix CHK %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -; CHK-NOT: InlineAsm +; bswap inline assembly should be preserved as-is. -; CHECK-LABEL: foo: -; CHECK: bswapq define i64 @foo(i64 %x) nounwind { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq %asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind ret i64 %asmtmp } -; CHECK-LABEL: bar: -; CHECK: bswapq define i64 @bar(i64 %x) nounwind { +; CHECK-LABEL: bar: +; CHECK: ## %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq %asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind ret i64 %asmtmp } -; CHECK-LABEL: pen: -; CHECK: bswapl define i32 @pen(i32 %x) nounwind { - %asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind +; CHECK-LABEL: pen: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq + %asmtmp = tail call i32 asm "bswapl ${0:k}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind ret i32 %asmtmp } -; CHECK-LABEL: s16: -; CHECK: rolw $8, define zeroext i16 @s16(i16 zeroext %x) nounwind { +; CHECK-LABEL: s16: +; CHECK: ## %bb.0: +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: rorw $8, %di +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: retq %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind ret i16 %asmtmp } -; CHECK-LABEL: t16: -; CHECK: rolw $8, define zeroext i16 @t16(i16 zeroext %x) nounwind { +; CHECK-LABEL: t16: +; CHECK: ## %bb.0: +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: rorw $8, %di +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: retq %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind ret i16 %asmtmp } -; CHECK-LABEL: u16: -; CHECK: rolw $8, define zeroext i16 @u16(i16 zeroext %x) nounwind { +; CHECK-LABEL: u16: +; CHECK: ## %bb.0: +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: rolw $8, %di +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: retq %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind ret i16 %asmtmp } -; CHECK-LABEL: v16: -; CHECK: rolw $8, define zeroext i16 @v16(i16 zeroext %x) nounwind { +; CHECK-LABEL: v16: +; CHECK: ## %bb.0: +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: rolw $8, %di +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: retq %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind ret i16 %asmtmp } -; CHECK-LABEL: s32: -; CHECK: bswapl define i32 @s32(i32 %x) nounwind { +; CHECK-LABEL: s32: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind ret i32 %asmtmp } -; CHECK-LABEL: t32: -; CHECK: bswapl define i32 @t32(i32 %x) nounwind { +; CHECK-LABEL: t32: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind ret i32 %asmtmp } -; CHECK-LABEL: u32: -; CHECK: bswapl define i32 @u32(i32 %x) nounwind { +; CHECK-LABEL: u32: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: rorw $8, %ax +; CHECK-NEXT: rorl $16, %eax +; CHECK-NEXT: rorw $8, %ax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind ret i32 %asmtmp } -; CHECK-LABEL: s64: -; CHECK: bswapq define i64 @s64(i64 %x) nounwind { +; CHECK-LABEL: s64: +; CHECK: ## %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind ret i64 %asmtmp } -; CHECK-LABEL: t64: -; CHECK: bswapq define i64 @t64(i64 %x) nounwind { +; CHECK-LABEL: t64: +; CHECK: ## %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind ret i64 %asmtmp } diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll index 4a9840eac489..66d009cf1221 100644 --- a/llvm/test/CodeGen/X86/call-graph-section.ll +++ b/llvm/test/CodeGen/X86/call-graph-section.ll @@ -11,14 +11,12 @@ declare !type !2 ptr @baz(ptr) define void @main() { entry: - %a = alloca i8, align 1 %fp_foo_val = load ptr, ptr null, align 8 call void (...) %fp_foo_val(), !callee_type !1 %fp_bar_val = load ptr, ptr null, align 8 - %param = trunc i64 0 to i8 - %call_fp_bar = call i32 %fp_bar_val(i8 signext %param), !callee_type !3 + %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !3 %fp_baz_val = load ptr, ptr null, align 8 - %call_fp_baz = call ptr %fp_baz_val(ptr %a), !callee_type !4 + %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4 ret void } diff --git a/llvm/test/CodeGen/X86/combine-gfni.ll b/llvm/test/CodeGen/X86/combine-gfni.ll new file mode 100644 index 000000000000..b105cdf7ea89 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-gfni.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni,+avx | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+gfni,+avx512bw | FileCheck %s --check-prefixes=AVX512 + +define <16 x i8> @gf2p8affineqb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { +; SSE-LABEL: gf2p8affineqb_freeze: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm3, %xmm3 +; SSE-NEXT: pcmpgtb %xmm2, %xmm3 +; SSE-NEXT: gf2p8affineqb $11, %xmm1, %xmm1 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: pandn %xmm0, %xmm3 +; SSE-NEXT: por %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: gf2p8affineqb_freeze: +; AVX: # %bb.0: +; AVX-NEXT: vgf2p8affineqb $11, %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: gf2p8affineqb_freeze: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovb2m %xmm2, %k1 +; AVX512-NEXT: vgf2p8affineqb $11, %xmm1, %xmm1, %xmm0 {%k1} +; AVX512-NEXT: retq + %i = icmp slt <16 x i8> %a2, zeroinitializer + %g = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %a1, <16 x i8> %a1, i8 11) + %f = freeze <16 x i8> %g + %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0 + ret <16 x i8> %r +} + +define <16 x i8> @gf2p8affineinvqb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { +; SSE-LABEL: gf2p8affineinvqb_freeze: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm3, %xmm3 +; SSE-NEXT: pcmpgtb %xmm2, %xmm3 +; SSE-NEXT: gf2p8affineinvqb $11, %xmm1, %xmm1 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: pandn %xmm0, %xmm3 +; SSE-NEXT: por %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: gf2p8affineinvqb_freeze: +; AVX: # %bb.0: +; AVX-NEXT: vgf2p8affineinvqb $11, %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: gf2p8affineinvqb_freeze: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovb2m %xmm2, %k1 +; AVX512-NEXT: vgf2p8affineinvqb $11, %xmm1, %xmm1, %xmm0 {%k1} +; AVX512-NEXT: retq + %i = icmp slt <16 x i8> %a2, zeroinitializer + %g = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %a1, <16 x i8> %a1, i8 11) + %f = freeze <16 x i8> %g + %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0 + ret <16 x i8> %r +} + +define <16 x i8> @gf2p8mulb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { +; SSE-LABEL: gf2p8mulb_freeze: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm3, %xmm3 +; SSE-NEXT: pcmpgtb %xmm2, %xmm3 +; SSE-NEXT: gf2p8mulb %xmm1, %xmm1 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: pandn %xmm0, %xmm3 +; SSE-NEXT: por %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: gf2p8mulb_freeze: +; AVX: # %bb.0: +; AVX-NEXT: vgf2p8mulb %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: gf2p8mulb_freeze: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovb2m %xmm2, %k1 +; AVX512-NEXT: vgf2p8mulb %xmm1, %xmm1, %xmm0 {%k1} +; AVX512-NEXT: retq + %i = icmp slt <16 x i8> %a2, zeroinitializer + %g = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %a1, <16 x i8> %a1) + %f = freeze <16 x i8> %g + %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0 + ret <16 x i8> %r +} + +declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8) +declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8) +declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8) +declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8) +declare <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll new file mode 100644 index 000000000000..2cb060ea92b1 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll @@ -0,0 +1,400 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX + +define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test1_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test1_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2) + ret <2 x i64> %1 +} + +define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test2_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test2_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %and) + ret <2 x i64> %1 +} + +define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_wrong_bits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_wrong_bits: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_wrong_bits: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 2251799813685247) ; (1LL << 51) - 1 + %or = or <2 x i64> %x2, splat (i64 2251799813685248) ; 1LL << 51 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_wrong_op: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_wrong_op: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %and, <2 x i64> %x1, <2 x i64> %x2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52h: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52h: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +; Test the fold x * 0 + y -> y +define <2 x i64> @test_vpmadd52l_mul_zero(<2 x i64> %x0, <2 x i64> %x1) { +; CHECK-LABEL: test_vpmadd52l_mul_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> %x1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h_mul_zero(<2 x i64> %x0, <2 x i64> %x1) { +; CHECK-LABEL: test_vpmadd52h_mul_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> %x1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_zero_commuted(<2 x i64> %x0, <2 x i64> %x1) { +; CHECK-LABEL: test_vpmadd52l_mul_zero_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 0, i64 0>) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_zero_both(<2 x i64> %x0) { +; CHECK-LABEL: test_vpmadd52l_mul_zero_both: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_zero_in_52bits(<2 x i64> %x0, <2 x i64> %x1) { +; CHECK-LABEL: test_vpmadd52l_mul_zero_in_52bits: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + + ; mul by (1 << 52) + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 4503599627370496), <2 x i64> %x1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_add_zero(<2 x i64> %x0, <2 x i64> %x1) { +; AVX512-LABEL: test_vpmadd52l_add_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vmovdqa %xmm2, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_add_zero: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2 +; AVX-NEXT: vmovdqa %xmm2, %xmm0 +; AVX-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> <i64 0, i64 0>, <2 x i64> %x0, <2 x i64> %x1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_zero_scalar(<2 x i64> %x0, <2 x i64> %x1) { +; AVX512-LABEL: test_vpmadd52l_mul_zero_scalar: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_mul_zero_scalar: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX-NEXT: retq + + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 123>, <2 x i64> %x1) + ret <2 x i64> %1 +} + +; (1 << 51) * (1 << 1) -> 1 << 52 -> low 52 bits are zeroes +define <2 x i64> @test_vpmadd52l_mul_lo52_zero(<2 x i64> %x0) { +; CHECK-LABEL: test_vpmadd52l_mul_lo52_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 2251799813685248), <2 x i64> splat (i64 2)) + ret <2 x i64> %1 +} + +; (1 << 25) * (1 << 26) = 1 << 51 -> high 52 bits are zeroes +define <2 x i64> @test_vpmadd52h_mul_hi52_zero(<2 x i64> %x0) { +; CHECK-LABEL: test_vpmadd52h_mul_hi52_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 33554432), <2 x i64> splat (i64 67108864)) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_lo52_const(<2 x i64> %x0) { +; AVX512-LABEL: test_vpmadd52l_mul_lo52_const: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_mul_lo52_const: +; AVX: # %bb.0: +; AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 123), <2 x i64> splat (i64 456)) + ret <2 x i64> %1 +} + +; (1 << 51) * (1 << 51) -> 1 << 102 -> the high 52 bits is 1 << 50 +define <2 x i64> @test_vpmadd52h_mul_hi52_const(<2 x i64> %x0) { +; AVX512-LABEL: test_vpmadd52h_mul_hi52_const: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52h_mul_hi52_const: +; AVX: # %bb.0: +; AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 2251799813685248), <2 x i64> splat (i64 2251799813685248)) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_lo52_mask(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test_vpmadd52l_mul_lo52_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h_mul_hi52_mask(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test_vpmadd52h_mul_hi52_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %and1 = lshr <2 x i64> %x0, splat (i64 40) + %and2 = lshr <2 x i64> %x1, splat (i64 40) + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_mul_lo52_mask_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_mul_lo52_mask_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm2 +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vpmadd52luq %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_mul_lo52_mask_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm2, %xmm0 +; AVX-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 2097152) ; 1LL << 21 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h_mul_hi52_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52h_mul_hi52_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlq $30, %xmm0, %xmm2 +; AVX512-NEXT: vpsrlq $43, %xmm1, %xmm1 +; AVX512-NEXT: vpmadd52huq %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52h_mul_hi52_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpsrlq $30, %xmm0, %xmm2 +; AVX-NEXT: vpsrlq $43, %xmm1, %xmm1 +; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm2, %xmm0 +; AVX-NEXT: retq + %and1 = lshr <2 x i64> %x0, splat (i64 30) + %and2 = lshr <2 x i64> %x1, splat (i64 43) + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) + ret <2 x i64> %1 +} + +define <2 x i64> @test1_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test1_knownbits_vpmadd52l: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 4) + %and2 = and <2 x i64> %x1, splat (i64 4) + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test1_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test1_knownbits_vpmadd52h: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3,3] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 3), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 3) + ret <2 x i64> %ret +} + +define <2 x i64> @test2_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test2_knownbits_vpmadd52l: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1234,1234] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 67108864) ; 1LL << 26 + %and2 = and <2 x i64> %x1, splat (i64 33554432) ; 1LL << 25 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1234), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1234) + ret <2 x i64> %ret +} + +define <2 x i64> @test2_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test2_knownbits_vpmadd52h: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + ; add (1LL << 20) + 1 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1025), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test3_knownbits_vpmadd52l_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_knownbits_vpmadd52l_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1] +; AVX512-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovdqa %xmm2, %xmm3 +; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm3 +; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_knownbits_vpmadd52l_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1] +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa %xmm2, %xmm3 +; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm3 +; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 67108865) ; (1LL << 26) + 1 + %or = or <2 x i64> %x1, splat (i64 1) + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %or) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test3_knownbits_vpmadd52h_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_knownbits_vpmadd52h_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1] +; AVX512-NEXT: vmovdqa %xmm2, %xmm3 +; AVX512-NEXT: vpmadd52huq %xmm1, %xmm0, %xmm3 +; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_knownbits_vpmadd52h_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1] +; AVX-NEXT: vmovdqa %xmm2, %xmm3 +; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm0, %xmm3 +; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 4194304) ; 1LL << 22 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + ; add (1LL << 20) + 1 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} diff --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll index 6f520aa57dcd..805b1b54d5b6 100644 --- a/llvm/test/CodeGen/X86/comi-flags.ll +++ b/llvm/test/CodeGen/X86/comi-flags.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,NO-AVX10_2 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX,AVX10_2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2 | FileCheck %s --check-prefixes=AVX,AVX10_2 ; ; SSE diff --git a/llvm/test/CodeGen/X86/evex512-mem.ll b/llvm/test/CodeGen/X86/evex512-mem.ll deleted file mode 100644 index 85bb3b3a5487..000000000000 --- a/llvm/test/CodeGen/X86/evex512-mem.ll +++ /dev/null @@ -1,29 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512 -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256 - -define void @test1() { -; AVX512-LABEL: test1: -; AVX512: # %bb.0: -; AVX512-NEXT: movq 64, %rax -; AVX512-NEXT: movq %rax, (%rax) -; AVX512-NEXT: vmovups 0, %zmm0 -; AVX512-NEXT: vmovups %zmm0, (%rax) -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq -; -; AVX256-LABEL: test1: -; AVX256: # %bb.0: -; AVX256-NEXT: movq 64, %rax -; AVX256-NEXT: movq %rax, (%rax) -; AVX256-NEXT: vmovups 0, %ymm0 -; AVX256-NEXT: vmovups 32, %ymm1 -; AVX256-NEXT: vmovups %ymm1, (%rax) -; AVX256-NEXT: vmovups %ymm0, (%rax) -; AVX256-NEXT: vzeroupper -; AVX256-NEXT: retq - call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 null, i64 72, i1 false) - ret void -} - -declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) diff --git a/llvm/test/CodeGen/X86/expand-large-fp-optnone.ll b/llvm/test/CodeGen/X86/expand-large-fp-optnone.ll new file mode 100644 index 000000000000..a155d125a6d1 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-large-fp-optnone.ll @@ -0,0 +1,252 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s + +; expand-fp must also run with optnone + +; Function Attrs: noinline optnone +define double @main(i224 %0) #0 { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entryitofp-entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: subq $88, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: orq %rdx, %rax +; CHECK-NEXT: movl %ecx, %r8d +; CHECK-NEXT: movq %rsi, %r9 +; CHECK-NEXT: orq %r8, %r9 +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: orq %r9, %rax +; CHECK-NEXT: je .LBB0_10 +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_1: # %itofp-if-end +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: movq %rax, %r9 +; CHECK-NEXT: sarq $31, %r9 +; CHECK-NEXT: sarq $63, %rax +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: xorq %rax, %rdx +; CHECK-NEXT: xorq %rax, %rsi +; CHECK-NEXT: xorq %r9, %rdi +; CHECK-NEXT: subq %r9, %rdi +; CHECK-NEXT: sbbq %rax, %rsi +; CHECK-NEXT: sbbq %rax, %rdx +; CHECK-NEXT: sbbq %rax, %rcx +; CHECK-NEXT: movq %rcx, %r8 +; CHECK-NEXT: shldq $32, %rdx, %r8 +; CHECK-NEXT: bsrq %r8, %rax +; CHECK-NEXT: xorl $63, %eax +; CHECK-NEXT: movq %rdx, %r10 +; CHECK-NEXT: shldq $32, %rsi, %r10 +; CHECK-NEXT: bsrq %r10, %r11 +; CHECK-NEXT: xorl $63, %r11d +; CHECK-NEXT: orl $64, %r11d +; CHECK-NEXT: testq %r8, %r8 +; CHECK-NEXT: cmovnel %eax, %r11d +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: shldq $32, %rdi, %rbx +; CHECK-NEXT: bsrq %rbx, %r14 +; CHECK-NEXT: xorl $63, %r14d +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq $32, %rax +; CHECK-NEXT: bsrq %rax, %rax +; CHECK-NEXT: xorl $63, %eax +; CHECK-NEXT: orl $64, %eax +; CHECK-NEXT: testq %rbx, %rbx +; CHECK-NEXT: cmovnel %r14d, %eax +; CHECK-NEXT: subl $-128, %eax +; CHECK-NEXT: orq %r8, %r10 +; CHECK-NEXT: cmovnel %r11d, %eax +; CHECK-NEXT: movl $224, %r11d +; CHECK-NEXT: subl %eax, %r11d +; CHECK-NEXT: movl $223, %r10d +; CHECK-NEXT: subl %eax, %r10d +; CHECK-NEXT: cmpl $53, %r11d +; CHECK-NEXT: jle .LBB0_8 +; CHECK-NEXT: # %bb.2: # %itofp-if-then4 +; CHECK-NEXT: movl %r11d, %r8d +; CHECK-NEXT: subl $54, %r8d +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_3: # %itofp-if-then4 +; CHECK-NEXT: movl %r11d, %r8d +; CHECK-NEXT: subl $55, %r8d +; CHECK-NEXT: jne .LBB0_5 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .LBB0_4: # %itofp-sw-bb +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shldq $1, %rdi, %rax +; CHECK-NEXT: movq %rdx, %r8 +; CHECK-NEXT: shldq $1, %rsi, %r8 +; CHECK-NEXT: shldq $1, %rdx, %rcx +; CHECK-NEXT: addq %rdi, %rdi +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: movq %r8, %rdx +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .LBB0_5: # %itofp-sw-default +; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl %ecx, %r8d +; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb $-87, %r8b +; CHECK-NEXT: subb %al, %r8b +; CHECK-NEXT: movb %r8b, %bl +; CHECK-NEXT: shrb $6, %bl +; CHECK-NEXT: movzbl %bl, %r12d +; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq -24(%rsp,%r12,8), %rbx +; CHECK-NEXT: movq -32(%rsp,%r12,8), %r13 +; CHECK-NEXT: movq %rcx, %rbp +; CHECK-NEXT: movb %r8b, %cl +; CHECK-NEXT: movq %r13, %r14 +; CHECK-NEXT: shrdq %cl, %rbx, %r14 +; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq -48(%rsp,%r12,8), %r15 +; CHECK-NEXT: movq -40(%rsp,%r12,8), %r12 +; CHECK-NEXT: movb %r8b, %cl +; CHECK-NEXT: movq %r12, %r14 +; CHECK-NEXT: shrdq %cl, %r13, %r14 +; CHECK-NEXT: movb %r8b, %cl +; CHECK-NEXT: shrq %cl, %rbx +; CHECK-NEXT: movb %r8b, %cl +; CHECK-NEXT: shrdq %cl, %r12, %r15 +; CHECK-NEXT: addb $55, %al +; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: shrb $3, %cl +; CHECK-NEXT: andb $24, %cl +; CHECK-NEXT: negb %cl +; CHECK-NEXT: movsbq %cl, %rdx +; CHECK-NEXT: movq -80(%rsp,%rdx), %rsi +; CHECK-NEXT: movq -72(%rsp,%rdx), %rdi +; CHECK-NEXT: movq -64(%rsp,%rdx), %r8 +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: movq %r8, %r12 +; CHECK-NEXT: shldq %cl, %rdi, %r12 +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: movq %rsi, %r13 +; CHECK-NEXT: shlq %cl, %r13 +; CHECK-NEXT: orq %r12, %r13 +; CHECK-NEXT: movq -56(%rsp,%rdx), %rdx +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: shldq %cl, %r8, %rdx +; CHECK-NEXT: movl %edx, %edx +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: orq %rdx, %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: orq %rdi, %r13 +; CHECK-NEXT: setne %al +; CHECK-NEXT: orq %rax, %r15 +; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: movq %r14, %rsi +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK-NEXT: movq %rbx, %rcx +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .LBB0_6: # %itofp-sw-epilog +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $2, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: orq %rax, %rdi +; CHECK-NEXT: addq $1, %rdi +; CHECK-NEXT: adcq $0, %rsi +; CHECK-NEXT: adcq $0, %rdx +; CHECK-NEXT: adcq $0, %rcx +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: shldq $62, %rdi, %rdx +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: btq $55, %rdi +; CHECK-NEXT: jae .LBB0_9 +; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: .LBB0_7: # %itofp-if-then20 +; CHECK-NEXT: shldq $61, %rdi, %rsi +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: movl %r11d, %r10d +; CHECK-NEXT: jmp .LBB0_9 +; CHECK-NEXT: .LBB0_8: # %itofp-if-else +; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: addb $85, %al +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: shrb $3, %cl +; CHECK-NEXT: andb $24, %cl +; CHECK-NEXT: negb %cl +; CHECK-NEXT: movsbq %cl, %rcx +; CHECK-NEXT: movq 48(%rsp,%rcx), %rdx +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: shlq %cl, %rdx +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: .LBB0_9: # %itofp-if-end26 +; CHECK-NEXT: andl $-2147483648, %r9d # imm = 0x80000000 +; CHECK-NEXT: shll $20, %r10d +; CHECK-NEXT: addl $1072693248, %r10d # imm = 0x3FF00000 +; CHECK-NEXT: andl $1048575, %eax # imm = 0xFFFFF +; CHECK-NEXT: orl %r9d, %eax +; CHECK-NEXT: orl %r10d, %eax +; CHECK-NEXT: movl %eax, %eax +; CHECK-NEXT: shlq $32, %rax +; CHECK-NEXT: movabsq $4294967295, %rcx # imm = 0xFFFFFFFF +; CHECK-NEXT: andq %rcx, %rdx +; CHECK-NEXT: orq %rdx, %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: .LBB0_10: # %itofp-return +; CHECK-NEXT: addq $88, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %x = sitofp i224 %0 to double + ret double %x +} + +attributes #0 = { noinline optnone } diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll index 989aabc9e87b..864c2336f37c 100644 --- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefixes=AVX10_2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86 declare float @llvm.maximum.f32(float, float) diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll index eef87b5a9f85..54d82b0c1c92 100644 --- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll +++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefixes=AVX10_2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86 declare float @llvm.maximumnum.f32(float, float) diff --git a/llvm/test/CodeGen/X86/fp16-reload.mir b/llvm/test/CodeGen/X86/fp16-reload.mir new file mode 100644 index 000000000000..ddbd48cbf3ee --- /dev/null +++ b/llvm/test/CodeGen/X86/fp16-reload.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=x86_64-unknown -start-before=twoaddressinstruction -stop-after=postrapseudos -verify-machineinstrs -o - %s | FileCheck %s + +... +--- +name: test +alignment: 16 +tracksRegLiveness: true +debugInstrRef: true +registers: +liveins: + - { reg: '$xmm0', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $xmm0 + + ; CHECK-LABEL: name: test + ; CHECK: liveins: $xmm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOVSSmr $rsp, 1, $noreg, -4, $noreg, $xmm0 :: (store (s32) into %stack.0, align 2) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm0, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + ; CHECK-NEXT: renamable $xmm0 = MOVSSrm $rsp, 1, $noreg, -4, $noreg :: (load (s32) from %stack.0, align 2) + ; CHECK-NEXT: FNOP implicit-def $fpsw, implicit killed renamable $xmm0 + ; CHECK-NEXT: RET 0 + %0:fr16 = COPY killed $xmm0 + INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm0, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + FNOP implicit-def $fpsw, implicit %0:fr16 + RET 0 + +... diff --git a/llvm/test/CodeGen/X86/fp16-spill.ll b/llvm/test/CodeGen/X86/fp16-spill.ll new file mode 100644 index 000000000000..6161009b6f56 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp16-spill.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512 + +define half @test(float %f, ptr %p) nounwind { +; SSE2-LABEL: test: +; SSE2: # %bb.0: +; SSE2-NEXT: pushq %rbx +; SSE2-NEXT: subq $16, %rsp +; SSE2-NEXT: movq %rdi, %rbx +; SSE2-NEXT: callq __truncsfhf2@PLT +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: #APP +; SSE2-NEXT: #NO_APP +; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss %xmm0, (%rbx) +; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: addq $16, %rsp +; SSE2-NEXT: popq %rbx +; SSE2-NEXT: retq +; +; AVX-LABEL: test: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbx +; AVX-NEXT: subq $16, %rsp +; AVX-NEXT: movq %rdi, %rbx +; AVX-NEXT: callq __truncsfhf2@PLT +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX-NEXT: #APP +; AVX-NEXT: #NO_APP +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss %xmm0, (%rbx) +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: addq $16, %rsp +; AVX-NEXT: popq %rbx +; AVX-NEXT: retq +; +; AVX512-LABEL: test: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: #APP +; AVX512-NEXT: #NO_APP +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vmovss %xmm0, (%rdi) +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: retq + %t = fptrunc float %f to half + %t2 = fpext half %t to float + tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"() + store float %t2, ptr %p + ret half %t +} diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll index c79e19f07cda..77eaaa1ca08d 100644 --- a/llvm/test/CodeGen/X86/fpenv.ll +++ b/llvm/test/CodeGen/X86/fpenv.ll @@ -11,244 +11,6 @@ declare i32 @llvm.get.fpmode.i32() declare void @llvm.set.fpmode.i32(i32 %fpmode) declare void @llvm.reset.fpmode() -define void @func_01() nounwind { -; X86-NOSSE-LABEL: func_01: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: fnstcw (%esp) -; X86-NOSSE-NEXT: orb $12, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldcw (%esp) -; X86-NOSSE-NEXT: popl %eax -; X86-NOSSE-NEXT: retl -; -; X86-SSE-LABEL: func_01: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: fnstcw (%esp) -; X86-SSE-NEXT: orb $12, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fldcw (%esp) -; X86-SSE-NEXT: stmxcsr (%esp) -; X86-SSE-NEXT: orb $96, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: ldmxcsr (%esp) -; X86-SSE-NEXT: popl %eax -; X86-SSE-NEXT: retl -; -; X64-LABEL: func_01: -; X64: # %bb.0: -; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-NEXT: orb $12, -{{[0-9]+}}(%rsp) -; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: orb $96, -{{[0-9]+}}(%rsp) -; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: retq - call void @llvm.set.rounding(i32 0) ; TowardZero (CW[11-10] = 11) - ret void -} - -define void @func_02() nounwind { -; X86-NOSSE-LABEL: func_02: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: fnstcw (%esp) -; X86-NOSSE-NEXT: andb $-13, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldcw (%esp) -; X86-NOSSE-NEXT: popl %eax -; X86-NOSSE-NEXT: retl -; -; X86-SSE-LABEL: func_02: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: fnstcw (%esp) -; X86-SSE-NEXT: andb $-13, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fldcw (%esp) -; X86-SSE-NEXT: stmxcsr (%esp) -; X86-SSE-NEXT: andb $-97, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: ldmxcsr (%esp) -; X86-SSE-NEXT: popl %eax -; X86-SSE-NEXT: retl -; -; X64-LABEL: func_02: -; X64: # %bb.0: -; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-NEXT: andb $-13, -{{[0-9]+}}(%rsp) -; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: andb $-97, -{{[0-9]+}}(%rsp) -; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: retq - call void @llvm.set.rounding(i32 1) ; ToNearestTiesToEven (CW[11-10] = 00) - ret void -} - -define void @func_03() nounwind { -; X86-NOSSE-LABEL: func_03: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: fnstcw (%esp) -; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF -; X86-NOSSE-NEXT: andl (%esp), %eax -; X86-NOSSE-NEXT: orl $2048, %eax # imm = 0x800 -; X86-NOSSE-NEXT: movw %ax, (%esp) -; X86-NOSSE-NEXT: fldcw (%esp) -; X86-NOSSE-NEXT: popl %eax -; X86-NOSSE-NEXT: retl -; -; X86-SSE-LABEL: func_03: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: fnstcw (%esp) -; X86-SSE-NEXT: movl $-3073, %eax # imm = 0xF3FF -; X86-SSE-NEXT: andl (%esp), %eax -; X86-SSE-NEXT: orl $2048, %eax # imm = 0x800 -; X86-SSE-NEXT: movw %ax, (%esp) -; X86-SSE-NEXT: fldcw (%esp) -; X86-SSE-NEXT: stmxcsr (%esp) -; X86-SSE-NEXT: movl $-24577, %eax # imm = 0x9FFF -; X86-SSE-NEXT: andl (%esp), %eax -; X86-SSE-NEXT: orl $16384, %eax # imm = 0x4000 -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: ldmxcsr (%esp) -; X86-SSE-NEXT: popl %eax -; X86-SSE-NEXT: retl -; -; X64-LABEL: func_03: -; X64: # %bb.0: -; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF -; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax -; X64-NEXT: orl $2048, %eax # imm = 0x800 -; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF -; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax -; X64-NEXT: orl $16384, %eax # imm = 0x4000 -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: retq - call void @llvm.set.rounding(i32 2) ; Upward (CW[11-10] = 10) - ret void -} - -define void @func_04() nounwind { -; X86-NOSSE-LABEL: func_04: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: fnstcw (%esp) -; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF -; X86-NOSSE-NEXT: andl (%esp), %eax -; X86-NOSSE-NEXT: orl $1024, %eax # imm = 0x400 -; X86-NOSSE-NEXT: movw %ax, (%esp) -; X86-NOSSE-NEXT: fldcw (%esp) -; X86-NOSSE-NEXT: popl %eax -; X86-NOSSE-NEXT: retl -; -; X86-SSE-LABEL: func_04: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: fnstcw (%esp) -; X86-SSE-NEXT: movl $-3073, %eax # imm = 0xF3FF -; X86-SSE-NEXT: andl (%esp), %eax -; X86-SSE-NEXT: orl $1024, %eax # imm = 0x400 -; X86-SSE-NEXT: movw %ax, (%esp) -; X86-SSE-NEXT: fldcw (%esp) -; X86-SSE-NEXT: stmxcsr (%esp) -; X86-SSE-NEXT: movl $-24577, %eax # imm = 0x9FFF -; X86-SSE-NEXT: andl (%esp), %eax -; X86-SSE-NEXT: orl $8192, %eax # imm = 0x2000 -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: ldmxcsr (%esp) -; X86-SSE-NEXT: popl %eax -; X86-SSE-NEXT: retl -; -; X64-LABEL: func_04: -; X64: # %bb.0: -; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF -; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax -; X64-NEXT: orl $1024, %eax # imm = 0x400 -; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF -; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax -; X64-NEXT: orl $8192, %eax # imm = 0x2000 -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: retq - call void @llvm.set.rounding(i32 3) ; Downward (CW[11-10] = 01) - ret void -} - -define void @func_05(i32 %x) nounwind { -; X86-NOSSE-LABEL: func_05: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: leal 4(%eax,%eax), %ecx -; X86-NOSSE-NEXT: movl $201, %eax -; X86-NOSSE-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NOSSE-NEXT: shll %cl, %eax -; X86-NOSSE-NEXT: andl $3072, %eax # imm = 0xC00 -; X86-NOSSE-NEXT: fnstcw (%esp) -; X86-NOSSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF -; X86-NOSSE-NEXT: andl (%esp), %ecx -; X86-NOSSE-NEXT: orl %eax, %ecx -; X86-NOSSE-NEXT: movw %cx, (%esp) -; X86-NOSSE-NEXT: fldcw (%esp) -; X86-NOSSE-NEXT: popl %eax -; X86-NOSSE-NEXT: retl -; -; X86-SSE-LABEL: func_05: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: leal 4(%eax,%eax), %ecx -; X86-SSE-NEXT: movl $201, %eax -; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SSE-NEXT: shll %cl, %eax -; X86-SSE-NEXT: andl $3072, %eax # imm = 0xC00 -; X86-SSE-NEXT: fnstcw (%esp) -; X86-SSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF -; X86-SSE-NEXT: andl (%esp), %ecx -; X86-SSE-NEXT: orl %eax, %ecx -; X86-SSE-NEXT: movw %cx, (%esp) -; X86-SSE-NEXT: fldcw (%esp) -; X86-SSE-NEXT: stmxcsr (%esp) -; X86-SSE-NEXT: movl $-24577, %ecx # imm = 0x9FFF -; X86-SSE-NEXT: andl (%esp), %ecx -; X86-SSE-NEXT: leal (%ecx,%eax,8), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: ldmxcsr (%esp) -; X86-SSE-NEXT: popl %eax -; X86-SSE-NEXT: retl -; -; X64-LABEL: func_05: -; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal 4(%rdi,%rdi), %ecx -; X64-NEXT: movl $201, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shll %cl, %eax -; X64-NEXT: andl $3072, %eax # imm = 0xC00 -; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-NEXT: movl $-3073, %ecx # imm = 0xF3FF -; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp) -; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: movl $-24577, %ecx # imm = 0x9FFF -; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx -; X64-NEXT: leal (%rcx,%rax,8), %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) -; X64-NEXT: retq - call void @llvm.set.rounding(i32 %x) ; Downward - ret void -} - define void @get_fpenv_01(ptr %ptr) #0 { ; X86-NOSSE-LABEL: get_fpenv_01: ; X86-NOSSE: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/X86/freeze.ll b/llvm/test/CodeGen/X86/freeze.ll index 3196f8177cc9..38e3e23f7caa 100644 --- a/llvm/test/CodeGen/X86/freeze.ll +++ b/llvm/test/CodeGen/X86/freeze.ll @@ -141,3 +141,48 @@ entry: %z = urem i32 %y, 10 ret i32 %z } + +; Make sure we don't crash when replacing all uses of N with an existing freeze N. + +define i64 @pr155345(ptr %p1, i1 %cond, ptr %p2, ptr %p3) { +; X86ASM-LABEL: pr155345: +; X86ASM: # %bb.0: # %entry +; X86ASM-NEXT: movzbl (%rdi), %edi +; X86ASM-NEXT: xorl %eax, %eax +; X86ASM-NEXT: orb $1, %dil +; X86ASM-NEXT: movb %dil, (%rdx) +; X86ASM-NEXT: movzbl %dil, %edx +; X86ASM-NEXT: cmovel %edx, %eax +; X86ASM-NEXT: sete %dil +; X86ASM-NEXT: testb $1, %sil +; X86ASM-NEXT: cmovnel %edx, %eax +; X86ASM-NEXT: movb %dl, (%rcx) +; X86ASM-NEXT: movl $1, %edx +; X86ASM-NEXT: movl %eax, %ecx +; X86ASM-NEXT: shlq %cl, %rdx +; X86ASM-NEXT: orb %sil, %dil +; X86ASM-NEXT: movzbl %dil, %eax +; X86ASM-NEXT: andl %edx, %eax +; X86ASM-NEXT: andl $1, %eax +; X86ASM-NEXT: retq +entry: + %load1 = load i8, ptr %p1, align 1 + %v1 = or i8 %load1, 1 + %v2 = zext i8 %v1 to i32 + store i8 %v1, ptr %p2, align 1 + %v3 = load i8, ptr %p2, align 1 + %ext1 = sext i8 %v3 to i64 + %ext2 = zext i32 %v2 to i64 + %cmp1 = icmp ult i64 0, %ext1 + %v4 = select i1 %cond, i1 false, i1 %cmp1 + %sel1 = select i1 %v4, i64 0, i64 %ext2 + %shl = shl i64 1, %sel1 + store i8 %v1, ptr %p3, align 1 + %v5 = load i8, ptr %p3, align 1 + %ext3 = sext i8 %v5 to i64 + %cmp2 = icmp ult i64 0, %ext3 + %v6 = select i1 %cond, i1 false, i1 %cmp2 + %sel2 = select i1 %v6, i64 0, i64 1 + %and = and i64 %sel2, %shl + ret i64 %and +} diff --git a/llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll b/llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll new file mode 100644 index 000000000000..aebfc7d483d6 --- /dev/null +++ b/llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll @@ -0,0 +1,580 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avxifma | FileCheck %s --check-prefixes=X64,AVX +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512ifma | FileCheck %s --check-prefixes=X64,AVX512,AVX512-NOVL +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=X64,AVX512,AVX512VL + +; 67108863 == (1 << 26) - 1 +; 4503599627370496 == (1 << 52) +; 4503599627370495 == (1 << 52) - 1 + +define <8 x i64> @test_512_combine(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) { +; AVX-LABEL: test_512_combine: +; AVX: # %bb.0: +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [67108863,67108863,67108863,67108863] +; AVX-NEXT: vpand %ymm6, %ymm2, %ymm2 +; AVX-NEXT: vpand %ymm6, %ymm0, %ymm0 +; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm0, %ymm4 +; AVX-NEXT: vpand %ymm6, %ymm3, %ymm0 +; AVX-NEXT: vpand %ymm6, %ymm1, %ymm1 +; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm1, %ymm5 +; AVX-NEXT: vmovdqa %ymm4, %ymm0 +; AVX-NEXT: vmovdqa %ymm5, %ymm1 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_512_combine: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm3 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863] +; AVX512-NEXT: vpandq %zmm3, %zmm0, %zmm0 +; AVX512-NEXT: vpandq %zmm3, %zmm1, %zmm1 +; AVX512-NEXT: vpmadd52luq %zmm1, %zmm0, %zmm2 +; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512-NEXT: retq + %x_masked = and <8 x i64> %x, splat (i64 67108863) + %y_masked = and <8 x i64> %y, splat (i64 67108863) + %mul = mul nuw nsw <8 x i64> %x_masked, %y_masked + %res = add nuw nsw <8 x i64> %mul, %z + ret <8 x i64> %res +} + +define <8 x i64> @test_512_combine_v2(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) { +; AVX-LABEL: test_512_combine_v2: +; AVX: # %bb.0: +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [3,3,3,3] +; AVX-NEXT: vpand %ymm6, %ymm2, %ymm2 +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm7 = [1125899906842623,1125899906842623,1125899906842623,1125899906842623] +; AVX-NEXT: vpand %ymm7, %ymm0, %ymm0 +; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm0, %ymm4 +; AVX-NEXT: vpand %ymm6, %ymm3, %ymm0 +; AVX-NEXT: vpand %ymm7, %ymm1, %ymm1 +; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm1, %ymm5 +; AVX-NEXT: vmovdqa %ymm4, %ymm0 +; AVX-NEXT: vmovdqa %ymm5, %ymm1 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_512_combine_v2: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1 +; AVX512-NEXT: vpmadd52luq %zmm1, %zmm0, %zmm2 +; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512-NEXT: retq + %x_masked = and <8 x i64> %x, splat (i64 1125899906842623) ; (1 << 50) - 1 + %y_masked = and <8 x i64> %y, splat (i64 3) + %mul = mul nuw nsw <8 x i64> %x_masked, %y_masked + %res = add nuw nsw <8 x i64> %mul, %z + ret <8 x i64> %res +} + +define <8 x i64> @test_512_no_combine(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) { +; AVX-LABEL: test_512_no_combine: +; AVX: # %bb.0: +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [4503599627370495,4503599627370495,4503599627370495,4503599627370495] +; AVX-NEXT: vpand %ymm6, %ymm0, %ymm7 +; AVX-NEXT: vpand %ymm6, %ymm1, %ymm8 +; AVX-NEXT: vpand %ymm6, %ymm2, %ymm9 +; AVX-NEXT: vpand %ymm6, %ymm3, %ymm6 +; AVX-NEXT: vpsrlq $32, %ymm8, %ymm8 +; AVX-NEXT: vpmuludq %ymm3, %ymm8, %ymm8 +; AVX-NEXT: vpsrlq $32, %ymm6, %ymm6 +; AVX-NEXT: vpmuludq %ymm6, %ymm1, %ymm6 +; AVX-NEXT: vpaddq %ymm6, %ymm8, %ymm6 +; AVX-NEXT: vpsllq $32, %ymm6, %ymm6 +; AVX-NEXT: vpmuludq %ymm3, %ymm1, %ymm1 +; AVX-NEXT: vpsrlq $32, %ymm7, %ymm3 +; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm3 +; AVX-NEXT: vpsrlq $32, %ymm9, %ymm7 +; AVX-NEXT: vpmuludq %ymm7, %ymm0, %ymm7 +; AVX-NEXT: vpaddq %ymm3, %ymm7, %ymm3 +; AVX-NEXT: vpsllq $32, %ymm3, %ymm3 +; AVX-NEXT: vpmuludq %ymm2, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm4, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm3, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm5, %ymm1, %ymm1 +; AVX-NEXT: vpaddq %ymm6, %ymm1, %ymm1 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_512_no_combine: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4503599627370495,4503599627370495,4503599627370495,4503599627370495,4503599627370495,4503599627370495,4503599627370495,4503599627370495] +; AVX512-NEXT: vpandq %zmm3, %zmm0, %zmm4 +; AVX512-NEXT: vpandq %zmm3, %zmm1, %zmm3 +; AVX512-NEXT: vpsrlq $32, %zmm4, %zmm4 +; AVX512-NEXT: vpmuludq %zmm1, %zmm4, %zmm4 +; AVX512-NEXT: vpsrlq $32, %zmm3, %zmm3 +; AVX512-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 +; AVX512-NEXT: vpaddq %zmm4, %zmm3, %zmm3 +; AVX512-NEXT: vpsllq $32, %zmm3, %zmm3 +; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0 +; AVX512-NEXT: vpaddq %zmm3, %zmm0, %zmm0 +; AVX512-NEXT: retq + %x_masked = and <8 x i64> %x, splat (i64 4503599627370495) + %y_masked = and <8 x i64> %y, splat (i64 4503599627370495) + %mul = mul nuw nsw <8 x i64> %x_masked, %y_masked + %res = add nuw nsw <8 x i64> %mul, %z + ret <8 x i64> %res +} + +define <8 x i64> @test_512_no_combine_v2(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) { +; AVX-LABEL: test_512_no_combine_v2: +; AVX: # %bb.0: +; AVX-NEXT: vpsrlq $32, %ymm1, %ymm6 +; AVX-NEXT: vpmuludq %ymm3, %ymm6, %ymm6 +; AVX-NEXT: vpsrlq $32, %ymm3, %ymm7 +; AVX-NEXT: vpmuludq %ymm7, %ymm1, %ymm7 +; AVX-NEXT: vpaddq %ymm6, %ymm7, %ymm6 +; AVX-NEXT: vpsllq $32, %ymm6, %ymm6 +; AVX-NEXT: vpmuludq %ymm3, %ymm1, %ymm1 +; AVX-NEXT: vpsrlq $32, %ymm0, %ymm3 +; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm3 +; AVX-NEXT: vpsrlq $32, %ymm2, %ymm7 +; AVX-NEXT: vpmuludq %ymm7, %ymm0, %ymm7 +; AVX-NEXT: vpaddq %ymm3, %ymm7, %ymm3 +; AVX-NEXT: vpsllq $32, %ymm3, %ymm3 +; AVX-NEXT: vpmuludq %ymm2, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm4, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm3, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm5, %ymm1, %ymm1 +; AVX-NEXT: vpaddq %ymm6, %ymm1, %ymm1 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_512_no_combine_v2: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm3 +; AVX512-NEXT: vpmuludq %zmm1, %zmm3, %zmm3 +; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm4 +; AVX512-NEXT: vpmuludq %zmm4, %zmm0, %zmm4 +; AVX512-NEXT: vpaddq %zmm3, %zmm4, %zmm3 +; AVX512-NEXT: vpsllq $32, %zmm3, %zmm3 +; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0 +; AVX512-NEXT: vpaddq %zmm3, %zmm0, %zmm0 +; AVX512-NEXT: retq + %mul = mul <8 x i64> %x, %y + %res = add <8 x i64> %mul, %z + ret <8 x i64> %res +} + +define <4 x i64> @test_256_combine(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { +; AVX-LABEL: test_256_combine: +; AVX: # %bb.0: +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863] +; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX-NEXT: {vex} vpmadd52luq %ymm1, %ymm0, %ymm2 +; AVX-NEXT: vmovdqa %ymm2, %ymm0 +; AVX-NEXT: retq +; +; AVX512-NOVL-LABEL: test_256_combine: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863] +; AVX512-NOVL-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX512-NOVL-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512-NOVL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 +; AVX512-NOVL-NEXT: vpaddq %ymm0, %ymm2, %ymm0 +; AVX512-NOVL-NEXT: retq +; +; AVX512VL-LABEL: test_256_combine: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863] +; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512VL-NEXT: vpmadd52luq %ymm1, %ymm0, %ymm2 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 +; AVX512VL-NEXT: retq + %x_masked = and <4 x i64> %x, splat(i64 67108863) + %y_masked = and <4 x i64> %y, splat(i64 67108863) + %mul = mul nuw nsw <4 x i64> %x_masked, %y_masked + %res = add nuw nsw <4 x i64> %z, %mul + ret <4 x i64> %res +} + +define <4 x i64> @test_256_no_combine(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { +; X64-LABEL: test_256_no_combine: +; X64: # %bb.0: +; X64-NEXT: vpsrlq $32, %ymm0, %ymm3 +; X64-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 +; X64-NEXT: vpsrlq $32, %ymm1, %ymm4 +; X64-NEXT: vpmuludq %ymm4, %ymm0, %ymm4 +; X64-NEXT: vpaddq %ymm3, %ymm4, %ymm3 +; X64-NEXT: vpsllq $32, %ymm3, %ymm3 +; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 +; X64-NEXT: retq + %mul = mul <4 x i64> %x, %y + %res = add <4 x i64> %mul, %z + ret <4 x i64> %res +} + +define <2 x i64> @test_128_combine(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; AVX-LABEL: test_128_combine: +; AVX: # %bb.0: +; AVX-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863] +; AVX-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2 +; AVX-NEXT: vmovdqa %xmm2, %xmm0 +; AVX-NEXT: retq +; +; AVX512-NOVL-LABEL: test_128_combine: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863] +; AVX512-NOVL-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX512-NOVL-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX512-NOVL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 +; AVX512-NOVL-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX512-NOVL-NEXT: retq +; +; AVX512VL-LABEL: test_128_combine: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863] +; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2 +; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0 +; AVX512VL-NEXT: retq + %x_masked = and <2 x i64> %x, splat (i64 67108863) + %y_masked = and <2 x i64> %y, splat (i64 67108863) + %mul = mul <2 x i64> %x_masked, %y_masked + %res = add <2 x i64> %z, %mul + ret <2 x i64> %res +} + +; Sanity check we're not applying this here +define <1 x i64> @test_scalar_no_ifma(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) { +; X64-LABEL: test_scalar_no_ifma: +; X64: # %bb.0: +; X64-NEXT: imulq %rsi, %rdi +; X64-NEXT: leaq (%rdi,%rdx), %rax +; X64-NEXT: retq + %mul = mul <1 x i64> %x, %y + %res = add <1 x i64> %mul, %z + ret <1 x i64> %res +} + +; 40-bit and 13-bit, too wide +define <8 x i64> @test_mixed_width_too_wide(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) { +; AVX-LABEL: test_mixed_width_too_wide: +; AVX: # %bb.0: +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [8191,8191,8191,8191] +; AVX-NEXT: vpand %ymm6, %ymm2, %ymm2 +; AVX-NEXT: vpand %ymm6, %ymm3, %ymm3 +; AVX-NEXT: vpmovzxdq {{.*#+}} ymm6 = [2155905028,2155905036,2155905044,2155905052] +; AVX-NEXT: vpshufb %ymm6, %ymm1, %ymm7 +; AVX-NEXT: vpmuludq %ymm3, %ymm7, %ymm7 +; AVX-NEXT: vpsllq $32, %ymm7, %ymm7 +; AVX-NEXT: vpmuludq %ymm3, %ymm1, %ymm1 +; AVX-NEXT: vpshufb %ymm6, %ymm0, %ymm3 +; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm3 +; AVX-NEXT: vpsllq $32, %ymm3, %ymm3 +; AVX-NEXT: vpmuludq %ymm2, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm0, %ymm4, %ymm0 +; AVX-NEXT: vpaddq %ymm3, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm1, %ymm5, %ymm1 +; AVX-NEXT: vpaddq %ymm7, %ymm1, %ymm1 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_mixed_width_too_wide: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1 +; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm3 +; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm0 +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpsllq $32, %zmm0, %zmm0 +; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm1 +; AVX512-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq + %x40 = and <8 x i64> %x, splat (i64 1099511627775) + %y13 = and <8 x i64> %y, splat (i64 8191) + %mul = mul <8 x i64> %x40, %y13 + %res = add <8 x i64> %z, %mul + ret <8 x i64> %res +} + +define <8 x i64> @test_zext32_inputs_not_safe(<8 x i32> %xi32, <8 x i32> %yi32, <8 x i64> %z) { +; AVX-LABEL: test_zext32_inputs_not_safe: +; AVX: # %bb.0: +; AVX-NEXT: vpmovzxdq {{.*#+}} ymm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: vpmovzxdq {{.*#+}} ymm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX-NEXT: vpmuludq %ymm5, %ymm4, %ymm4 +; AVX-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm1 +; AVX-NEXT: vpaddq %ymm4, %ymm2, %ymm0 +; AVX-NEXT: vpaddq %ymm1, %ymm3, %ymm1 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_zext32_inputs_not_safe: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero +; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: retq + %x = zext <8 x i32> %xi32 to <8 x i64> + %y = zext <8 x i32> %yi32 to <8 x i64> + %mul = mul <8 x i64> %x, %y + %res = add <8 x i64> %z, %mul + ret <8 x i64> %res +} + +define <16 x i64> @test_1024_combine_split(<16 x i64> %x, <16 x i64> %y, <16 x i64> %z) nounwind { +; AVX-LABEL: test_1024_combine_split: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $32, %rsp +; AVX-NEXT: vmovdqa 112(%rbp), %ymm8 +; AVX-NEXT: vmovdqa 80(%rbp), %ymm9 +; AVX-NEXT: vmovdqa 48(%rbp), %ymm10 +; AVX-NEXT: vmovdqa 16(%rbp), %ymm11 +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm12 = [67108863,67108863,67108863,67108863] +; AVX-NEXT: vpand %ymm3, %ymm12, %ymm3 +; AVX-NEXT: vpand %ymm2, %ymm12, %ymm2 +; AVX-NEXT: vpand %ymm1, %ymm12, %ymm1 +; AVX-NEXT: vpand %ymm0, %ymm12, %ymm0 +; AVX-NEXT: vpand %ymm7, %ymm12, %ymm7 +; AVX-NEXT: {vex} vpmadd52luq %ymm7, %ymm3, %ymm8 +; AVX-NEXT: vpand %ymm6, %ymm12, %ymm3 +; AVX-NEXT: {vex} vpmadd52luq %ymm3, %ymm2, %ymm9 +; AVX-NEXT: vpand %ymm5, %ymm12, %ymm2 +; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm10 +; AVX-NEXT: vpand %ymm4, %ymm12, %ymm1 +; AVX-NEXT: {vex} vpmadd52luq %ymm1, %ymm0, %ymm11 +; AVX-NEXT: vmovdqa %ymm11, %ymm0 +; AVX-NEXT: vmovdqa %ymm10, %ymm1 +; AVX-NEXT: vmovdqa %ymm9, %ymm2 +; AVX-NEXT: vmovdqa %ymm8, %ymm3 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq +; +; AVX512-LABEL: test_1024_combine_split: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm6 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863] +; AVX512-NEXT: vpandq %zmm6, %zmm2, %zmm2 +; AVX512-NEXT: vpandq %zmm6, %zmm0, %zmm0 +; AVX512-NEXT: vpmadd52luq %zmm2, %zmm0, %zmm4 +; AVX512-NEXT: vpandq %zmm6, %zmm3, %zmm0 +; AVX512-NEXT: vpandq %zmm6, %zmm1, %zmm1 +; AVX512-NEXT: vpmadd52luq %zmm0, %zmm1, %zmm5 +; AVX512-NEXT: vmovdqa64 %zmm4, %zmm0 +; AVX512-NEXT: vmovdqa64 %zmm5, %zmm1 +; AVX512-NEXT: retq + %x_masked = and <16 x i64> %x, splat (i64 67108863) + %y_masked = and <16 x i64> %y, splat (i64 67108863) + %mul = mul <16 x i64> %x_masked, %y_masked + %res = add <16 x i64> %z, %mul + ret <16 x i64> %res +} + +define <1 x i64> @test_not_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) { +; X64-LABEL: test_not_v1i64: +; X64: # %bb.0: +; X64-NEXT: andl $67108863, %edi # imm = 0x3FFFFFF +; X64-NEXT: imulq %rdi, %rdi +; X64-NEXT: leaq (%rdi,%rdx), %rax +; X64-NEXT: retq + %x_masked = and <1 x i64> %x, splat (i64 67108863) + %y_masked = and <1 x i64> %x, splat (i64 67108863) + %mul = mul <1 x i64> %x_masked, %y_masked + %res = add <1 x i64> %mul, %z + ret <1 x i64> %res +} + +define <3 x i64> @test_v3i64(<3 x i64> %x, <3 x i64> %y, <3 x i64> %z) { +; AVX-LABEL: test_v3i64: +; AVX: # %bb.0: +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863] +; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpmuludq %ymm0, %ymm0, %ymm0 +; AVX-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; AVX-NEXT: retq +; +; AVX512-NOVL-LABEL: test_v3i64: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863] +; AVX512-NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512-NOVL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0 +; AVX512-NOVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; AVX512-NOVL-NEXT: retq +; +; AVX512VL-LABEL: test_v3i64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: retq + %x_masked = and <3 x i64> %x, splat (i64 67108863) + %y_masked = and <3 x i64> %x, splat (i64 67108863) + %mul = mul <3 x i64> %x_masked, %y_masked + %res = add <3 x i64> %mul, %z + ret <3 x i64> %res +} + +define <5 x i64> @test_v5i64(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) { +; AVX-LABEL: test_v5i64: +; AVX: # %bb.0: +; AVX-NEXT: movq %rdi, %rax +; AVX-NEXT: vmovq %r8, %xmm0 +; AVX-NEXT: vmovq %rcx, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovq %rdx, %xmm1 +; AVX-NEXT: vmovq %rsi, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm2 +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863] +; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF +; AVX-NEXT: vmovq %rcx, %xmm3 +; AVX-NEXT: vmovq %r9, %xmm4 +; AVX-NEXT: vpand %xmm3, %xmm4, %xmm3 +; AVX-NEXT: vpsrlq $32, %xmm3, %xmm4 +; AVX-NEXT: vpmuludq %xmm4, %xmm3, %xmm4 +; AVX-NEXT: vpsllq $33, %xmm4, %xmm4 +; AVX-NEXT: vpmuludq %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpaddq %xmm1, %xmm3, %xmm1 +; AVX-NEXT: vpaddq %xmm4, %xmm1, %xmm1 +; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm2 +; AVX-NEXT: vmovdqa %ymm2, (%rdi) +; AVX-NEXT: vmovq %xmm1, 32(%rdi) +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v5i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0 +; AVX512-NEXT: retq + %x_masked = and <5 x i64> %x, splat (i64 67108863) + %y_masked = and <5 x i64> %x, splat (i64 67108863) + %mul = mul <5 x i64> %x_masked, %y_masked + %res = add <5 x i64> %mul, %z + ret <5 x i64> %res +} + +define <6 x i64> @test_v6i64(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) { +; AVX-LABEL: test_v6i64: +; AVX: # %bb.0: +; AVX-NEXT: movq %rdi, %rax +; AVX-NEXT: vmovq %r8, %xmm0 +; AVX-NEXT: vmovq %rcx, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovq %rdx, %xmm1 +; AVX-NEXT: vmovq %rsi, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm1 +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [67108863,67108863,67108863,67108863] +; AVX-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm1 +; AVX-NEXT: vmovq %r9, %xmm0 +; AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpmuldq %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpaddq {{[0-9]+}}(%rsp), %xmm0, %xmm0 +; AVX-NEXT: vmovdqa %xmm0, 32(%rdi) +; AVX-NEXT: vmovdqa %ymm1, (%rdi) +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v6i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0 +; AVX512-NEXT: retq + %x_masked = and <6 x i64> %x, splat (i64 67108863) + %y_masked = and <6 x i64> %x, splat (i64 67108863) + %mul = mul <6 x i64> %x_masked, %y_masked + %res = add <6 x i64> %mul, %z + ret <6 x i64> %res +} + +define <9 x i64> @test_v9i64(<9 x i64> %x, <9 x i64> %y, <9 x i64> %z) { +; AVX-LABEL: test_v9i64: +; AVX: # %bb.0: +; AVX-NEXT: movq %rdi, %rax +; AVX-NEXT: vmovq %r8, %xmm0 +; AVX-NEXT: vmovq %rcx, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovq %rdx, %xmm1 +; AVX-NEXT: vmovq %rsi, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: vmovq %r9, %xmm1 +; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-NEXT: vinserti128 $1, {{[0-9]+}}(%rsp), %ymm1, %ymm1 +; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm3 +; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm4 +; AVX-NEXT: vpbroadcastq {{.*#+}} ymm5 = [67108863,67108863,67108863,67108863] +; AVX-NEXT: vpand %ymm5, %ymm0, %ymm0 +; AVX-NEXT: vpand %ymm5, %ymm1, %ymm1 +; AVX-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF +; AVX-NEXT: vmovq %rcx, %xmm5 +; AVX-NEXT: vmovq {{.*#+}} xmm6 = mem[0],zero +; AVX-NEXT: vpand %xmm5, %xmm6, %xmm5 +; AVX-NEXT: vpsrlq $32, %xmm5, %xmm6 +; AVX-NEXT: vpmuludq %xmm6, %xmm5, %xmm6 +; AVX-NEXT: vpsllq $33, %xmm6, %xmm6 +; AVX-NEXT: vpmuludq %xmm5, %xmm5, %xmm5 +; AVX-NEXT: vpaddq %xmm2, %xmm5, %xmm2 +; AVX-NEXT: vpaddq %xmm6, %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm4 +; AVX-NEXT: {vex} vpmadd52luq %ymm1, %ymm1, %ymm3 +; AVX-NEXT: vmovdqa %ymm3, 32(%rdi) +; AVX-NEXT: vmovdqa %ymm4, (%rdi) +; AVX-NEXT: vmovq %xmm2, 64(%rdi) +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v9i64: +; AVX512: # %bb.0: +; AVX512-NEXT: movq %rdi, %rax +; AVX512-NEXT: vmovq %r8, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm1 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512-NEXT: vmovq %rdx, %xmm1 +; AVX512-NEXT: vmovq %rsi, %xmm2 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vmovq %r9, %xmm1 +; AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512-NEXT: vinserti128 $1, {{[0-9]+}}(%rsp), %ymm1, %ymm1 +; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; AVX512-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm2 +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF +; AVX512-NEXT: vmovq %rcx, %xmm3 +; AVX512-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero +; AVX512-NEXT: vpand %xmm3, %xmm4, %xmm3 +; AVX512-NEXT: vpsrlq $32, %xmm3, %xmm4 +; AVX512-NEXT: vpmuludq %xmm4, %xmm3, %xmm4 +; AVX512-NEXT: vpsllq $33, %xmm4, %xmm4 +; AVX512-NEXT: vpmuludq %xmm3, %xmm3, %xmm3 +; AVX512-NEXT: vpaddq %xmm1, %xmm3, %xmm1 +; AVX512-NEXT: vpaddq %xmm4, %xmm1, %xmm1 +; AVX512-NEXT: vpmadd52luq %zmm0, %zmm0, %zmm2 +; AVX512-NEXT: vmovq %xmm1, 64(%rdi) +; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %x_masked = and <9 x i64> %x, splat (i64 67108863) + %y_masked = and <9 x i64> %x, splat (i64 67108863) + %mul = mul <9 x i64> %x_masked, %y_masked + %res = add <9 x i64> %mul, %z + ret <9 x i64> %res +} diff --git a/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll b/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll index 57dccfc1b4a8..0538541a6f7b 100644 --- a/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll +++ b/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll @@ -18,9 +18,9 @@ define i64 @t(ptr %arg) nounwind { ret i64 0 } -; Make sure that we translate this to the bswap intrinsic which lowers down without the -; inline assembly. -; CHECK-NOT: #APP +; Make sure this lowers to inline assembly and is not translated to an +; intrinsic. +; CHECK: #APP define i32 @s(i32 %argc, ptr nocapture %argv) unnamed_addr nounwind { entry: %0 = trunc i32 %argc to i16 diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll index 3ac0fd7746a3..eccb32346a40 100644 --- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll +++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll @@ -22,41 +22,45 @@ define void @FontChange(i1 %foo) nounwind { ; CHECK-LABEL: FontChange: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je .LBB0_10 +; CHECK-NEXT: je .LBB0_12 +; CHECK-NEXT: # %bb.1: # %bb298 +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %bb304 +; CHECK-NEXT: je .LBB0_4 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_1: # %bb366 +; CHECK-NEXT: .LBB0_3: # %bb366 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: # %bb.2: # %bb428 +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: .LBB0_4: # %bb428 ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je .LBB0_10 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: je .LBB0_12 +; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: cmpb $0, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_4: # %bb650 +; CHECK-NEXT: .LBB0_6: # %bb650 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: je .LBB0_4 -; CHECK-NEXT: # %bb.5: # %bb662 +; CHECK-NEXT: je .LBB0_6 +; CHECK-NEXT: # %bb.7: # %bb662 ; CHECK-NEXT: movl 0, %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: andl $57344, %ecx # imm = 0xE000 ; CHECK-NEXT: cmpl $8192, %ecx # imm = 0x2000 -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.6: # %bb4884 +; CHECK-NEXT: jne .LBB0_12 +; CHECK-NEXT: # %bb.8: # %bb4884 ; CHECK-NEXT: andl $7168, %eax # imm = 0x1C00 ; CHECK-NEXT: cmpl $1024, %eax # imm = 0x400 -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.7: # %bb4932 +; CHECK-NEXT: jne .LBB0_12 +; CHECK-NEXT: # %bb.9: # %bb4932 ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.8: # %bb4940 +; CHECK-NEXT: jne .LBB0_12 +; CHECK-NEXT: # %bb.10: # %bb4940 ; CHECK-NEXT: movl 0, %eax ; CHECK-NEXT: cmpl $160, %eax -; CHECK-NEXT: je .LBB0_10 -; CHECK-NEXT: # %bb.9: # %bb4940 +; CHECK-NEXT: je .LBB0_12 +; CHECK-NEXT: # %bb.11: # %bb4940 ; CHECK-NEXT: cmpl $159, %eax -; CHECK-NEXT: .LBB0_10: # %bb4897 +; CHECK-NEXT: .LBB0_12: # %bb4897 ; CHECK-NEXT: retq entry: br i1 %foo, label %bb298, label %bb49 diff --git a/llvm/test/CodeGen/X86/isel-ceil.ll b/llvm/test/CodeGen/X86/isel-ceil.ll new file mode 100644 index 000000000000..c82cfebd4814 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-ceil.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 + +define float @ceil_f32(float %a) nounwind readnone { +; DAG-X64-LABEL: ceil_f32: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: jmp ceilf@PLT # TAILCALL +; +; FASTISEL-X64-LABEL: ceil_f32: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: pushq %rax +; FASTISEL-X64-NEXT: callq ceilf@PLT +; FASTISEL-X64-NEXT: popq %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: ceil_f32: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll ceilf +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: ceil_f32: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: jmp ceilf@PLT # TAILCALL + %c = call float @llvm.ceil.f32(float %a) + ret float %c +} + +define double @ceil_f64(double %a) nounwind readnone { +; DAG-X64-LABEL: ceil_f64: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: jmp ceil@PLT # TAILCALL +; +; FASTISEL-X64-LABEL: ceil_f64: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: pushq %rax +; FASTISEL-X64-NEXT: callq ceil@PLT +; FASTISEL-X64-NEXT: popq %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: ceil_f64: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: fstpl (%esp) +; X86-NEXT: calll ceil +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: ceil_f64: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: jmp ceil@PLT # TAILCALL + %c = call double @llvm.ceil.f64(double %a) + ret double %c +} + +define x86_fp80 @ceil_f80(x86_fp80 %a) nounwind readnone { +; X64-LABEL: ceil_f80: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: callq ceill@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +; +; X86-LABEL: ceil_f80: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: calll ceill +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: ceil_f80: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: subq $24, %rsp +; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: fstpt (%rsp) +; GISEL-X64-NEXT: callq ceill@PLT +; GISEL-X64-NEXT: addq $24, %rsp +; GISEL-X64-NEXT: retq + %c = call x86_fp80 @llvm.ceil.f80(x86_fp80 %a) + ret x86_fp80 %c +} + diff --git a/llvm/test/CodeGen/X86/isel-floor.ll b/llvm/test/CodeGen/X86/isel-floor.ll new file mode 100644 index 000000000000..675925b61126 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-floor.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 + +define float @floor_f32(float %a) nounwind readnone { +; DAG-X64-LABEL: floor_f32: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: jmp floorf@PLT # TAILCALL +; +; FASTISEL-X64-LABEL: floor_f32: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: pushq %rax +; FASTISEL-X64-NEXT: callq floorf@PLT +; FASTISEL-X64-NEXT: popq %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: floor_f32: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll floorf +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: floor_f32: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: jmp floorf@PLT # TAILCALL + %c = call float @llvm.floor.f32(float %a) + ret float %c +} + +define double @floor_f64(double %a) nounwind readnone { +; DAG-X64-LABEL: floor_f64: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: jmp floor@PLT # TAILCALL +; +; FASTISEL-X64-LABEL: floor_f64: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: pushq %rax +; FASTISEL-X64-NEXT: callq floor@PLT +; FASTISEL-X64-NEXT: popq %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: floor_f64: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: fstpl (%esp) +; X86-NEXT: calll floor +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: floor_f64: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: jmp floor@PLT # TAILCALL + %c = call double @llvm.floor.f64(double %a) + ret double %c +} + +define x86_fp80 @floor_f80(x86_fp80 %a) nounwind readnone { +; X64-LABEL: floor_f80: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: callq floorl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +; +; X86-LABEL: floor_f80: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: calll floorl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: floor_f80: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: subq $24, %rsp +; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: fstpt (%rsp) +; GISEL-X64-NEXT: callq floorl@PLT +; GISEL-X64-NEXT: addq $24, %rsp +; GISEL-X64-NEXT: retq + %c = call x86_fp80 @llvm.floor.f80(x86_fp80 %a) + ret x86_fp80 %c +} + diff --git a/llvm/test/CodeGen/X86/isel-ftrunc.ll b/llvm/test/CodeGen/X86/isel-ftrunc.ll new file mode 100644 index 000000000000..9bf06193961a --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-ftrunc.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 + +define float @trunc_f32(float %a) nounwind readnone { +; DAG-X64-LABEL: trunc_f32: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: jmp truncf@PLT # TAILCALL +; +; FASTISEL-X64-LABEL: trunc_f32: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: pushq %rax +; FASTISEL-X64-NEXT: callq truncf@PLT +; FASTISEL-X64-NEXT: popq %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: trunc_f32: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll truncf +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: trunc_f32: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: jmp truncf@PLT # TAILCALL + %c = call float @llvm.trunc.f32(float %a) + ret float %c +} + +define double @trunc_f64(double %a) nounwind readnone { +; DAG-X64-LABEL: trunc_f64: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: jmp trunc@PLT # TAILCALL +; +; FASTISEL-X64-LABEL: trunc_f64: +; FASTISEL-X64: # %bb.0: +; FASTISEL-X64-NEXT: pushq %rax +; FASTISEL-X64-NEXT: callq trunc@PLT +; FASTISEL-X64-NEXT: popq %rax +; FASTISEL-X64-NEXT: retq +; +; X86-LABEL: trunc_f64: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: fstpl (%esp) +; X86-NEXT: calll trunc +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: trunc_f64: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: jmp trunc@PLT # TAILCALL + %c = call double @llvm.trunc.f64(double %a) + ret double %c +} + +define x86_fp80 @trunc_f80(x86_fp80 %a) nounwind readnone { +; X64-LABEL: trunc_f80: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: callq truncl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +; +; X86-LABEL: trunc_f80: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: calll truncl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; GISEL-X64-LABEL: trunc_f80: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: subq $24, %rsp +; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: fstpt (%rsp) +; GISEL-X64-NEXT: callq truncl@PLT +; GISEL-X64-NEXT: addq $24, %rsp +; GISEL-X64-NEXT: retq + %c = call x86_fp80 @llvm.trunc.f80(x86_fp80 %a) + ret x86_fp80 %c +} + diff --git a/llvm/test/CodeGen/X86/llvm.acos.ll b/llvm/test/CodeGen/X86/isel-llvm.acos.ll index 9176cf47bda7..9176cf47bda7 100644 --- a/llvm/test/CodeGen/X86/llvm.acos.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.acos.ll diff --git a/llvm/test/CodeGen/X86/llvm.asin.ll b/llvm/test/CodeGen/X86/isel-llvm.asin.ll index 87ffcc9c963c..87ffcc9c963c 100644 --- a/llvm/test/CodeGen/X86/llvm.asin.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.asin.ll diff --git a/llvm/test/CodeGen/X86/llvm.atan.ll b/llvm/test/CodeGen/X86/isel-llvm.atan.ll index c03361d18c1d..c03361d18c1d 100644 --- a/llvm/test/CodeGen/X86/llvm.atan.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.atan.ll diff --git a/llvm/test/CodeGen/X86/llvm.atan2.ll b/llvm/test/CodeGen/X86/isel-llvm.atan2.ll index aa56068e1778..aa56068e1778 100644 --- a/llvm/test/CodeGen/X86/llvm.atan2.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.atan2.ll diff --git a/llvm/test/CodeGen/X86/llvm.cos.ll b/llvm/test/CodeGen/X86/isel-llvm.cos.ll index af039854d349..af039854d349 100644 --- a/llvm/test/CodeGen/X86/llvm.cos.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.cos.ll diff --git a/llvm/test/CodeGen/X86/llvm.cosh.ll b/llvm/test/CodeGen/X86/isel-llvm.cosh.ll index a61867c11fd4..a61867c11fd4 100644 --- a/llvm/test/CodeGen/X86/llvm.cosh.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.cosh.ll diff --git a/llvm/test/CodeGen/X86/isel-llvm.set.rounding.ll b/llvm/test/CodeGen/X86/isel-llvm.set.rounding.ll new file mode 100644 index 000000000000..688add1e92ab --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-llvm.set.rounding.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=-sse | FileCheck %s --check-prefixes=X86-NOSSE,SDAG-X86-NOSSE +; RUN: llc < %s -mtriple=i686-- -fast-isel -fast-isel-abort=1 -mattr=-sse | FileCheck %s --check-prefixes=X86-NOSSE,FASTISEL-X86-NOSSE +; RUN: llc < %s -mtriple=i686-- -global-isel -global-isel-abort=2 -mattr=-sse | FileCheck %s --check-prefixes=X86-NOSSE,GISEL-X86-NOSSE +; RUN: llc < %s -mtriple=x86_64-- -mattr=-sse | FileCheck %s --check-prefixes=X64-NOSSE,SDAG-X64-NOSSE +; RUN: llc < %s -mtriple=x86_64-- -fast-isel -fast-isel-abort=1 -mattr=-sse | FileCheck %s --check-prefixes=X64-NOSSE,FASTISEL-X64-NOSSE +; RUN: llc < %s -mtriple=x86_64-- -global-isel -global-isel-abort=2 -mattr=-sse | FileCheck %s --check-prefixes=X64-NOSSE,GISEL-X64-NOSSE +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86,SDAG-X86 +; RUN: llc < %s -mtriple=i686-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X86,FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-- -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86,GISEL-X86 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc < %s -mtriple=x86_64-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64,FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-- -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64,GISEL-X64 + +declare void @llvm.set.rounding(i32 %x) + +define void @func_01() nounwind { +; X86-NOSSE-LABEL: func_01: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: fnstcw (%esp) +; X86-NOSSE-NEXT: orb $12, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldcw (%esp) +; X86-NOSSE-NEXT: popl %eax +; X86-NOSSE-NEXT: retl +; +; X64-NOSSE-LABEL: func_01: +; X64-NOSSE: # %bb.0: +; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: orb $12, -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: retq +; +; X86-LABEL: func_01: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: fnstcw (%esp) +; X86-NEXT: orb $12, {{[0-9]+}}(%esp) +; X86-NEXT: fldcw (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: func_01: +; X64: # %bb.0: +; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NEXT: orb $12, -{{[0-9]+}}(%rsp) +; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: orb $96, -{{[0-9]+}}(%rsp) +; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + call void @llvm.set.rounding(i32 0) ; TowardZero (CW[11-10] = 11) + ret void +} + +define void @func_02() nounwind { +; X86-NOSSE-LABEL: func_02: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: fnstcw (%esp) +; X86-NOSSE-NEXT: andb $-13, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fldcw (%esp) +; X86-NOSSE-NEXT: popl %eax +; X86-NOSSE-NEXT: retl +; +; X64-NOSSE-LABEL: func_02: +; X64-NOSSE: # %bb.0: +; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: andb $-13, -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: retq +; +; X86-LABEL: func_02: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: fnstcw (%esp) +; X86-NEXT: andb $-13, {{[0-9]+}}(%esp) +; X86-NEXT: fldcw (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: func_02: +; X64: # %bb.0: +; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NEXT: andb $-13, -{{[0-9]+}}(%rsp) +; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: andb $-97, -{{[0-9]+}}(%rsp) +; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + call void @llvm.set.rounding(i32 1) ; ToNearestTiesToEven (CW[11-10] = 00) + ret void +} + +define void @func_03() nounwind { +; X86-NOSSE-LABEL: func_03: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: fnstcw (%esp) +; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF +; X86-NOSSE-NEXT: andl (%esp), %eax +; X86-NOSSE-NEXT: orl $2048, %eax # imm = 0x800 +; X86-NOSSE-NEXT: movw %ax, (%esp) +; X86-NOSSE-NEXT: fldcw (%esp) +; X86-NOSSE-NEXT: popl %eax +; X86-NOSSE-NEXT: retl +; +; X64-NOSSE-LABEL: func_03: +; X64-NOSSE: # %bb.0: +; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF +; X64-NOSSE-NEXT: andl -{{[0-9]+}}(%rsp), %eax +; X64-NOSSE-NEXT: orl $2048, %eax # imm = 0x800 +; X64-NOSSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: retq +; +; X86-LABEL: func_03: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: fnstcw (%esp) +; X86-NEXT: movl $-3073, %eax # imm = 0xF3FF +; X86-NEXT: andl (%esp), %eax +; X86-NEXT: orl $2048, %eax # imm = 0x800 +; X86-NEXT: movw %ax, (%esp) +; X86-NEXT: fldcw (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: func_03: +; X64: # %bb.0: +; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF +; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: orl $2048, %eax # imm = 0x800 +; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF +; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: orl $16384, %eax # imm = 0x4000 +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + call void @llvm.set.rounding(i32 2) ; Upward (CW[11-10] = 10) + ret void +} + +define void @func_04() nounwind { +; X86-NOSSE-LABEL: func_04: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: fnstcw (%esp) +; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF +; X86-NOSSE-NEXT: andl (%esp), %eax +; X86-NOSSE-NEXT: orl $1024, %eax # imm = 0x400 +; X86-NOSSE-NEXT: movw %ax, (%esp) +; X86-NOSSE-NEXT: fldcw (%esp) +; X86-NOSSE-NEXT: popl %eax +; X86-NOSSE-NEXT: retl +; +; X64-NOSSE-LABEL: func_04: +; X64-NOSSE: # %bb.0: +; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF +; X64-NOSSE-NEXT: andl -{{[0-9]+}}(%rsp), %eax +; X64-NOSSE-NEXT: orl $1024, %eax # imm = 0x400 +; X64-NOSSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: retq +; +; X86-LABEL: func_04: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: fnstcw (%esp) +; X86-NEXT: movl $-3073, %eax # imm = 0xF3FF +; X86-NEXT: andl (%esp), %eax +; X86-NEXT: orl $1024, %eax # imm = 0x400 +; X86-NEXT: movw %ax, (%esp) +; X86-NEXT: fldcw (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: func_04: +; X64: # %bb.0: +; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF +; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: orl $1024, %eax # imm = 0x400 +; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF +; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: orl $8192, %eax # imm = 0x2000 +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + call void @llvm.set.rounding(i32 3) ; Downward (CW[11-10] = 01) + ret void +} + +define void @func_05(i32 %x) nounwind { +; X86-NOSSE-LABEL: func_05: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: leal 4(%eax,%eax), %ecx +; X86-NOSSE-NEXT: movl $201, %eax +; X86-NOSSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOSSE-NEXT: shll %cl, %eax +; X86-NOSSE-NEXT: andl $3072, %eax # imm = 0xC00 +; X86-NOSSE-NEXT: fnstcw (%esp) +; X86-NOSSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF +; X86-NOSSE-NEXT: andl (%esp), %ecx +; X86-NOSSE-NEXT: orl %eax, %ecx +; X86-NOSSE-NEXT: movw %cx, (%esp) +; X86-NOSSE-NEXT: fldcw (%esp) +; X86-NOSSE-NEXT: popl %eax +; X86-NOSSE-NEXT: retl +; +; X64-NOSSE-LABEL: func_05: +; X64-NOSSE: # %bb.0: +; X64-NOSSE-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NOSSE-NEXT: leal 4(%rdi,%rdi), %ecx +; X64-NOSSE-NEXT: movl $201, %eax +; X64-NOSSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOSSE-NEXT: shll %cl, %eax +; X64-NOSSE-NEXT: andl $3072, %eax # imm = 0xC00 +; X64-NOSSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF +; X64-NOSSE-NEXT: andl -{{[0-9]+}}(%rsp), %ecx +; X64-NOSSE-NEXT: orl %eax, %ecx +; X64-NOSSE-NEXT: movw %cx, -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NOSSE-NEXT: retq +; +; X86-LABEL: func_05: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal 4(%eax,%eax), %ecx +; X86-NEXT: movl $201, %eax +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %eax +; X86-NEXT: andl $3072, %eax # imm = 0xC00 +; X86-NEXT: fnstcw (%esp) +; X86-NEXT: movl $-3073, %ecx # imm = 0xF3FF +; X86-NEXT: andl (%esp), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movw %cx, (%esp) +; X86-NEXT: fldcw (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: func_05: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal 4(%rdi,%rdi), %ecx +; X64-NEXT: movl $201, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %eax +; X64-NEXT: andl $3072, %eax # imm = 0xC00 +; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $-3073, %ecx # imm = 0xF3FF +; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: orl %eax, %ecx +; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp) +; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) +; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $-24577, %ecx # imm = 0x9FFF +; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: leal (%rcx,%rax,8), %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + call void @llvm.set.rounding(i32 %x) ; Downward + ret void +} + +attributes #0 = { nounwind "use-soft-float"="true" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; FASTISEL-X64: {{.*}} +; FASTISEL-X64-NOSSE: {{.*}} +; FASTISEL-X86: {{.*}} +; FASTISEL-X86-NOSSE: {{.*}} +; GISEL-X64: {{.*}} +; GISEL-X64-NOSSE: {{.*}} +; GISEL-X86: {{.*}} +; GISEL-X86-NOSSE: {{.*}} +; SDAG-X64: {{.*}} +; SDAG-X64-NOSSE: {{.*}} +; SDAG-X86: {{.*}} +; SDAG-X86-NOSSE: {{.*}} diff --git a/llvm/test/CodeGen/X86/llvm.sin.ll b/llvm/test/CodeGen/X86/isel-llvm.sin.ll index 0f17f83d0102..0f17f83d0102 100644 --- a/llvm/test/CodeGen/X86/llvm.sin.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.sin.ll diff --git a/llvm/test/CodeGen/X86/llvm.sincos.ll b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll index 065710f91457..065710f91457 100644 --- a/llvm/test/CodeGen/X86/llvm.sincos.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll diff --git a/llvm/test/CodeGen/X86/llvm.sinh.ll b/llvm/test/CodeGen/X86/isel-llvm.sinh.ll index ef30f8de0695..ef30f8de0695 100644 --- a/llvm/test/CodeGen/X86/llvm.sinh.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.sinh.ll diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/isel-llvm.tan.ll index 4e76653cd129..4e76653cd129 100644 --- a/llvm/test/CodeGen/X86/llvm.tan.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.tan.ll diff --git a/llvm/test/CodeGen/X86/llvm.tanh.ll b/llvm/test/CodeGen/X86/isel-llvm.tanh.ll index c4f6e2f179cf..c4f6e2f179cf 100644 --- a/llvm/test/CodeGen/X86/llvm.tanh.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.tanh.ll diff --git a/llvm/test/CodeGen/X86/kmov.ll b/llvm/test/CodeGen/X86/kmov.ll index cab810d30cd7..8b1e69a97d54 100644 --- a/llvm/test/CodeGen/X86/kmov.ll +++ b/llvm/test/CodeGen/X86/kmov.ll @@ -143,6 +143,57 @@ define <8 x i1> @invert_i8_mask_extract_8(i8 %mask) { ret <8 x i1> %cmp.45 } +define <8 x i1> @i8_mask_extract_7(i8 %mask) { +; X64-AVX512-LABEL: i8_mask_extract_7: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: shrb %dil +; X64-AVX512-NEXT: movzbl %dil, %eax +; X64-AVX512-NEXT: kmovd %eax, %k0 +; X64-AVX512-NEXT: vpmovm2w %k0, %xmm0 +; X64-AVX512-NEXT: retq +; +; X64-KNL-LABEL: i8_mask_extract_7: +; X64-KNL: # %bb.0: +; X64-KNL-NEXT: vmovd %edi, %xmm0 +; X64-KNL-NEXT: vpbroadcastb %xmm0, %xmm0 +; X64-KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,4,8,16,32,64,128,0,2,4,8,16,32,64,128,0] +; X64-KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; X64-KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; X64-KNL-NEXT: retq + %.splatinsert = insertelement <8 x i8> poison, i8 %mask, i64 0 + %.splat = shufflevector <8 x i8> %.splatinsert, <8 x i8> poison, <8 x i32> zeroinitializer + %1 = and <8 x i8> %.splat, <i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 poison> + %cmp.45 = icmp ne <8 x i8> %1, zeroinitializer + ret <8 x i1> %cmp.45 +} + +define <8 x i1> @invert_i8_mask_extract_7(i8 %mask) { +; X64-AVX512-LABEL: invert_i8_mask_extract_7: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: shrb %dil +; X64-AVX512-NEXT: movzbl %dil, %eax +; X64-AVX512-NEXT: kmovd %eax, %k0 +; X64-AVX512-NEXT: knotb %k0, %k0 +; X64-AVX512-NEXT: vpmovm2w %k0, %xmm0 +; X64-AVX512-NEXT: retq +; +; X64-KNL-LABEL: invert_i8_mask_extract_7: +; X64-KNL: # %bb.0: +; X64-KNL-NEXT: vmovd %edi, %xmm0 +; X64-KNL-NEXT: vpbroadcastb %xmm0, %xmm0 +; X64-KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; X64-KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; X64-KNL-NEXT: retq + %.splatinsert = insertelement <8 x i8> poison, i8 %mask, i64 0 + %.splat = shufflevector <8 x i8> %.splatinsert, <8 x i8> poison, <8 x i32> zeroinitializer + %1 = and <8 x i8> %.splat, <i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 poison> + %cmp.45 = icmp eq <8 x i8> %1, zeroinitializer + ret <8 x i1> %cmp.45 +} + define <4 x i1> @i16_mask_extract_4(i16 %mask) { ; X64-AVX512-LABEL: i16_mask_extract_4: ; X64-AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll index 7bcf57311853..5f38645f7463 100644 --- a/llvm/test/CodeGen/X86/llrint-conv.ll +++ b/llvm/test/CodeGen/X86/llrint-conv.ll @@ -7,47 +7,15 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX -define i64 @testmsxh(half %x) nounwind { -; X86-NOSSE-LABEL: testmsxh: -; X86-NOSSE: # %bb.0: # %entry -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: calll __extendhfsf2 -; X86-NOSSE-NEXT: fstps (%esp) -; X86-NOSSE-NEXT: calll llrintf -; X86-NOSSE-NEXT: popl %ecx -; X86-NOSSE-NEXT: retl -; -; X86-SSE2-LABEL: testmsxh: -; X86-SSE2: # %bb.0: # %entry -; X86-SSE2-NEXT: pushl %eax -; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 -; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax -; X86-SSE2-NEXT: movw %ax, (%esp) -; X86-SSE2-NEXT: calll __extendhfsf2 -; X86-SSE2-NEXT: fstps (%esp) -; X86-SSE2-NEXT: calll llrintf -; X86-SSE2-NEXT: popl %ecx -; X86-SSE2-NEXT: retl -; -; X64-SSE-LABEL: testmsxh: -; X64-SSE: # %bb.0: # %entry -; X64-SSE-NEXT: pushq %rax -; X64-SSE-NEXT: callq __extendhfsf2@PLT -; X64-SSE-NEXT: callq rintf@PLT -; X64-SSE-NEXT: callq __truncsfhf2@PLT -; X64-SSE-NEXT: callq __extendhfsf2@PLT -; X64-SSE-NEXT: cvttss2si %xmm0, %rax -; X64-SSE-NEXT: popq %rcx -; X64-SSE-NEXT: retq -entry: - %0 = tail call i64 @llvm.llrint.i64.f16(half %x) - ret i64 %0 -} +; FIXME: crash +; define i64 @test_llrint_i64_f16(half %x) nounwind { +; entry: +; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) +; ret i64 %0 +; } -define i64 @testmsxs(float %x) nounwind { -; X86-NOSSE-LABEL: testmsxs: +define i64 @test_llrint_i64_f32(float %x) nounwind { +; X86-NOSSE-LABEL: test_llrint_i64_f32: ; X86-NOSSE: # %bb.0: # %entry ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp @@ -61,7 +29,7 @@ define i64 @testmsxs(float %x) nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE2-LABEL: testmsxs: +; X86-SSE2-LABEL: test_llrint_i64_f32: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: pushl %ebp ; X86-SSE2-NEXT: movl %esp, %ebp @@ -77,7 +45,7 @@ define i64 @testmsxs(float %x) nounwind { ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl ; -; X86-AVX-LABEL: testmsxs: +; X86-AVX-LABEL: test_llrint_i64_f32: ; X86-AVX: # %bb.0: # %entry ; X86-AVX-NEXT: pushl %ebp ; X86-AVX-NEXT: movl %esp, %ebp @@ -93,12 +61,12 @@ define i64 @testmsxs(float %x) nounwind { ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl ; -; X64-SSE-LABEL: testmsxs: +; X64-SSE-LABEL: test_llrint_i64_f32: ; X64-SSE: # %bb.0: # %entry ; X64-SSE-NEXT: cvtss2si %xmm0, %rax ; X64-SSE-NEXT: retq ; -; X64-AVX-LABEL: testmsxs: +; X64-AVX-LABEL: test_llrint_i64_f32: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: vcvtss2si %xmm0, %rax ; X64-AVX-NEXT: retq @@ -107,8 +75,8 @@ entry: ret i64 %0 } -define i64 @testmsxd(double %x) nounwind { -; X86-NOSSE-LABEL: testmsxd: +define i64 @test_llrint_i64_f64(double %x) nounwind { +; X86-NOSSE-LABEL: test_llrint_i64_f64: ; X86-NOSSE: # %bb.0: # %entry ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp @@ -122,7 +90,7 @@ define i64 @testmsxd(double %x) nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE2-LABEL: testmsxd: +; X86-SSE2-LABEL: test_llrint_i64_f64: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: pushl %ebp ; X86-SSE2-NEXT: movl %esp, %ebp @@ -138,7 +106,7 @@ define i64 @testmsxd(double %x) nounwind { ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl ; -; X86-AVX-LABEL: testmsxd: +; X86-AVX-LABEL: test_llrint_i64_f64: ; X86-AVX: # %bb.0: # %entry ; X86-AVX-NEXT: pushl %ebp ; X86-AVX-NEXT: movl %esp, %ebp @@ -154,12 +122,12 @@ define i64 @testmsxd(double %x) nounwind { ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl ; -; X64-SSE-LABEL: testmsxd: +; X64-SSE-LABEL: test_llrint_i64_f64: ; X64-SSE: # %bb.0: # %entry ; X64-SSE-NEXT: cvtsd2si %xmm0, %rax ; X64-SSE-NEXT: retq ; -; X64-AVX-LABEL: testmsxd: +; X64-AVX-LABEL: test_llrint_i64_f64: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax ; X64-AVX-NEXT: retq @@ -168,8 +136,8 @@ entry: ret i64 %0 } -define i64 @testmsll(x86_fp80 %x) nounwind { -; X86-LABEL: testmsll: +define i64 @test_llrint_i64_f80(x86_fp80 %x) nounwind { +; X86-LABEL: test_llrint_i64_f80: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp @@ -183,7 +151,7 @@ define i64 @testmsll(x86_fp80 %x) nounwind { ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; X64-LABEL: testmsll: +; X64-LABEL: test_llrint_i64_f80: ; X64: # %bb.0: # %entry ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fistpll -{{[0-9]+}}(%rsp) @@ -195,8 +163,8 @@ entry: } ; FIXME(#44744): incorrect libcall -define i64 @testmslq(fp128 %x) nounwind { -; X86-NOSSE-LABEL: testmslq: +define i64 @test_llrint_i64_f128(fp128 %x) nounwind { +; X86-NOSSE-LABEL: test_llrint_i64_f128: ; X86-NOSSE: # %bb.0: # %entry ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp @@ -212,7 +180,7 @@ define i64 @testmslq(fp128 %x) nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE2-LABEL: testmslq: +; X86-SSE2-LABEL: test_llrint_i64_f128: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: pushl %ebp ; X86-SSE2-NEXT: movl %esp, %ebp @@ -228,7 +196,7 @@ define i64 @testmslq(fp128 %x) nounwind { ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl ; -; X86-AVX-LABEL: testmslq: +; X86-AVX-LABEL: test_llrint_i64_f128: ; X86-AVX: # %bb.0: # %entry ; X86-AVX-NEXT: pushl %ebp ; X86-AVX-NEXT: movl %esp, %ebp @@ -241,11 +209,181 @@ define i64 @testmslq(fp128 %x) nounwind { ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl ; -; X64-LABEL: testmslq: +; X64-LABEL: test_llrint_i64_f128: ; X64: # %bb.0: # %entry ; X64-NEXT: jmp llrintl@PLT # TAILCALL entry: - %0 = tail call i64 @llvm.llrint.i64.fp128(fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) + ret i64 %0 +} + +; FIXME: crash +; define i64 @test_llrint_i64_f16_strict(half %x) nounwind strictfp { +; entry: +; %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %0 +; } + +define i64 @test_llrint_i64_f32_strict(float %x) nounwind strictfp { +; X86-NOSSE-LABEL: test_llrint_i64_f32_strict: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: wait +; X86-NOSSE-NEXT: calll llrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llrint_i64_f32_strict: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: calll llrintf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: test_llrint_i64_f32_strict: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: pushl %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vmovss %xmm0, (%esp) +; X86-AVX-NEXT: calll llrintf +; X86-AVX-NEXT: popl %ecx +; X86-AVX-NEXT: retl +; +; X64-LABEL: test_llrint_i64_f32_strict: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq llrintf@PLT +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %0 +} + +define i64 @test_llrint_i64_f64_strict(double %x) nounwind strictfp { +; X86-NOSSE-LABEL: test_llrint_i64_f64_strict: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: wait +; X86-NOSSE-NEXT: calll llrint +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llrint_i64_f64_strict: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: calll llrint +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: test_llrint_i64_f64_strict: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: calll llrint +; X86-AVX-NEXT: addl $8, %esp +; X86-AVX-NEXT: retl +; +; X64-LABEL: test_llrint_i64_f64_strict: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq llrint@PLT +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %0 +} + +define i64 @test_llrint_i64_f80_strict(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: test_llrint_i64_f80_strict: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll llrintl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: test_llrint_i64_f80_strict: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq llrintl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %0 +} + +; FIXME(#44744): incorrect libcall +define i64 @test_llrint_i64_f128_strict(fp128 %x) nounwind strictfp { +; X86-NOSSE-LABEL: test_llrint_i64_f128_strict: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-16, %esp +; X86-NOSSE-NEXT: subl $16, %esp +; X86-NOSSE-NEXT: pushl 20(%ebp) +; X86-NOSSE-NEXT: pushl 16(%ebp) +; X86-NOSSE-NEXT: pushl 12(%ebp) +; X86-NOSSE-NEXT: pushl 8(%ebp) +; X86-NOSSE-NEXT: calll llrintl +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llrint_i64_f128_strict: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: pushl 20(%ebp) +; X86-SSE2-NEXT: pushl 16(%ebp) +; X86-SSE2-NEXT: pushl 12(%ebp) +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: calll llrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: test_llrint_i64_f128_strict: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-16, %esp +; X86-AVX-NEXT: subl $32, %esp +; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll llrintl +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-LABEL: test_llrint_i64_f128_strict: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq llrintl@PLT +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") ret i64 %0 } diff --git a/llvm/test/CodeGen/X86/llround-conv.ll b/llvm/test/CodeGen/X86/llround-conv.ll index 19a980b72809..ef4df82e9e57 100644 --- a/llvm/test/CodeGen/X86/llround-conv.ll +++ b/llvm/test/CodeGen/X86/llround-conv.ll @@ -1,88 +1,84 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2 +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64 ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 -define i64 @testmsxs(float %x) { -; X86-LABEL: testmsxs: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fstps (%esp) -; X86-NEXT: calll llroundf -; X86-NEXT: popl %ecx -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; FIXME: crash +; define i64 @test_llround_f16(half %x) nounwind { +; %conv = tail call i64 @llvm.llround.f16(half %x) +; ret i64 %conv +; } + +define i64 @test_llround_f32(float %x) nounwind { +; X86-NOSSE-LABEL: test_llround_f32: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll llroundf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl ; -; SSE2-LABEL: testmsxs: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pushl %eax -; SSE2-NEXT: .cfi_def_cfa_offset 8 -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss %xmm0, (%esp) -; SSE2-NEXT: calll llroundf -; SSE2-NEXT: popl %ecx -; SSE2-NEXT: .cfi_def_cfa_offset 4 -; SSE2-NEXT: retl +; X86-SSE2-LABEL: test_llround_f32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: calll llroundf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl ; -; GISEL-X86-LABEL: testmsxs: -; GISEL-X86: # %bb.0: # %entry +; X64-LABEL: test_llround_f32: +; X64: # %bb.0: +; X64-NEXT: jmp llroundf@PLT # TAILCALL +; +; GISEL-X86-LABEL: test_llround_f32: +; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; GISEL-X86-NEXT: movl %eax, (%esp) ; GISEL-X86-NEXT: calll llroundf ; GISEL-X86-NEXT: addl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 4 ; GISEL-X86-NEXT: retl ; -; X64-LABEL: testmsxs: -; X64: # %bb.0: # %entry -; X64-NEXT: jmp llroundf@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmsxs: -; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-LABEL: test_llround_f32: +; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: pushq %rax -; GISEL-X64-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X64-NEXT: callq llroundf ; GISEL-X64-NEXT: popq %rcx -; GISEL-X64-NEXT: .cfi_def_cfa_offset 8 ; GISEL-X64-NEXT: retq -entry: - %0 = tail call i64 @llvm.llround.f32(float %x) - ret i64 %0 + %conv = tail call i64 @llvm.llround.f32(float %x) + ret i64 %conv } -define i64 @testmsxd(double %x) { -; X86-LABEL: testmsxd: -; X86: # %bb.0: # %entry -; X86-NEXT: subl $8, %esp -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NEXT: fstpl (%esp) -; X86-NEXT: calll llround -; X86-NEXT: addl $8, %esp -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +define i64 @test_llround_f64(double %x) nounwind { +; X86-NOSSE-LABEL: test_llround_f64: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: calll llround +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl ; -; SSE2-LABEL: testmsxd: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: subl $8, %esp -; SSE2-NEXT: .cfi_def_cfa_offset 12 -; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: movsd %xmm0, (%esp) -; SSE2-NEXT: calll llround -; SSE2-NEXT: addl $8, %esp -; SSE2-NEXT: .cfi_def_cfa_offset 4 -; SSE2-NEXT: retl +; X86-SSE2-LABEL: test_llround_f64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: calll llround +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl ; -; GISEL-X86-LABEL: testmsxd: -; GISEL-X86: # %bb.0: # %entry +; X64-LABEL: test_llround_f64: +; X64: # %bb.0: +; X64-NEXT: jmp llround@PLT # TAILCALL +; +; GISEL-X86-LABEL: test_llround_f64: +; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GISEL-X86-NEXT: movl 4(%eax), %eax @@ -92,111 +88,140 @@ define i64 @testmsxd(double %x) { ; GISEL-X86-NEXT: movl %eax, 4(%edx) ; GISEL-X86-NEXT: calll llround ; GISEL-X86-NEXT: addl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 4 ; GISEL-X86-NEXT: retl ; -; X64-LABEL: testmsxd: -; X64: # %bb.0: # %entry -; X64-NEXT: jmp llround@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmsxd: -; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-LABEL: test_llround_f64: +; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: pushq %rax -; GISEL-X64-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X64-NEXT: callq llround ; GISEL-X64-NEXT: popq %rcx -; GISEL-X64-NEXT: .cfi_def_cfa_offset 8 ; GISEL-X64-NEXT: retq -entry: - %0 = tail call i64 @llvm.llround.f64(double %x) - ret i64 %0 + %conv = tail call i64 @llvm.llround.f64(double %x) + ret i64 %conv } -define i64 @testmsll(x86_fp80 %x) { -; X86-LABEL: testmsll: -; X86: # %bb.0: # %entry +define i64 @test_llround_f80(x86_fp80 %x) nounwind { +; X86-LABEL: test_llround_f80: +; X86: # %bb.0: ; X86-NEXT: subl $12, %esp -; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstpt (%esp) ; X86-NEXT: calll llroundl ; X86-NEXT: addl $12, %esp -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; -; SSE2-LABEL: testmsll: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: subl $12, %esp -; SSE2-NEXT: .cfi_def_cfa_offset 16 -; SSE2-NEXT: fldt {{[0-9]+}}(%esp) -; SSE2-NEXT: fstpt (%esp) -; SSE2-NEXT: calll llroundl -; SSE2-NEXT: addl $12, %esp -; SSE2-NEXT: .cfi_def_cfa_offset 4 -; SSE2-NEXT: retl +; X64-LABEL: test_llround_f80: +; X64: # %bb.0: +; X64-NEXT: jmp llroundl@PLT # TAILCALL ; -; GISEL-X86-LABEL: testmsll: -; GISEL-X86: # %bb.0: # %entry +; GISEL-X86-LABEL: test_llround_f80: +; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fstpt (%esp) ; GISEL-X86-NEXT: calll llroundl ; GISEL-X86-NEXT: addl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 4 ; GISEL-X86-NEXT: retl ; -; X64-LABEL: testmsll: -; X64: # %bb.0: # %entry -; X64-NEXT: jmp llroundl@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmsll: -; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-LABEL: test_llround_f80: +; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: subq $24, %rsp -; GISEL-X64-NEXT: .cfi_def_cfa_offset 32 ; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) ; GISEL-X64-NEXT: fstpt (%rsp) ; GISEL-X64-NEXT: callq llroundl ; GISEL-X64-NEXT: addq $24, %rsp -; GISEL-X64-NEXT: .cfi_def_cfa_offset 8 ; GISEL-X64-NEXT: retq -entry: - %0 = tail call i64 @llvm.llround.f80(x86_fp80 %x) - ret i64 %0 + %conv = tail call i64 @llvm.llround.f80(x86_fp80 %x) + ret i64 %conv } -define i64 @test_llround_i64_f32(float %x) nounwind { -; X86-LABEL: test_llround_i64_f32: +; FIXME(#44744): incorrect libcall +define i64 @test_llround_f128(fp128 %x) nounwind { +; X86-LABEL: test_llround_f128: ; X86: # %bb.0: -; X86-NEXT: pushl %eax -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fstps (%esp) -; X86-NEXT: calll llroundf -; X86-NEXT: popl %ecx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl 8(%ebp) +; X86-NEXT: calll llroundl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; -; SSE2-LABEL: test_llround_i64_f32: -; SSE2: # %bb.0: -; SSE2-NEXT: pushl %eax -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss %xmm0, (%esp) -; SSE2-NEXT: calll llroundf -; SSE2-NEXT: popl %ecx -; SSE2-NEXT: retl +; X64-LABEL: test_llround_f128: +; X64: # %bb.0: +; X64-NEXT: jmp llroundl@PLT # TAILCALL ; -; GISEL-X86-LABEL: test_llround_i64_f32: +; GISEL-X86-LABEL: test_llround_f128: ; GISEL-X86: # %bb.0: -; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp ; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; GISEL-X86-NEXT: movl %eax, (%esp) -; GISEL-X86-NEXT: calll llroundf -; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll llroundf128 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi ; GISEL-X86-NEXT: retl ; +; GISEL-X64-LABEL: test_llround_f128: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq llroundf128 +; GISEL-X64-NEXT: popq %rcx +; GISEL-X64-NEXT: retq + %conv = tail call i64 @llvm.llround.f128(fp128 %x) + ret i64 %conv +} + +; FIXME: crash +; define i64 @test_llround_i64_f16(half %x) nounwind { +; %conv = call i64 @llvm.llround.i64.f16(half %x) +; ret i64 %conv +; } + +define i64 @test_llround_i64_f32(float %x) nounwind { +; X86-NOSSE-LABEL: test_llround_i64_f32: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll llroundf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llround_i64_f32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: calll llroundf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; ; X64-LABEL: test_llround_i64_f32: ; X64: # %bb.0: ; X64-NEXT: jmp llroundf@PLT # TAILCALL ; +; GISEL-X86-LABEL: test_llround_i64_f32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: calll llroundf +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl +; ; GISEL-X64-LABEL: test_llround_i64_f32: ; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: pushq %rax @@ -208,23 +233,27 @@ define i64 @test_llround_i64_f32(float %x) nounwind { } define i64 @test_llround_i64_f64(double %x) nounwind { -; X86-LABEL: test_llround_i64_f64: -; X86: # %bb.0: -; X86-NEXT: subl $8, %esp -; X86-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NEXT: fstpl (%esp) -; X86-NEXT: calll llround -; X86-NEXT: addl $8, %esp -; X86-NEXT: retl +; X86-NOSSE-LABEL: test_llround_i64_f64: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: calll llround +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llround_i64_f64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: calll llround +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl ; -; SSE2-LABEL: test_llround_i64_f64: -; SSE2: # %bb.0: -; SSE2-NEXT: subl $8, %esp -; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: movsd %xmm0, (%esp) -; SSE2-NEXT: calll llround -; SSE2-NEXT: addl $8, %esp -; SSE2-NEXT: retl +; X64-LABEL: test_llround_i64_f64: +; X64: # %bb.0: +; X64-NEXT: jmp llround@PLT # TAILCALL ; ; GISEL-X86-LABEL: test_llround_i64_f64: ; GISEL-X86: # %bb.0: @@ -240,10 +269,6 @@ define i64 @test_llround_i64_f64(double %x) nounwind { ; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl ; -; X64-LABEL: test_llround_i64_f64: -; X64: # %bb.0: -; X64-NEXT: jmp llround@PLT # TAILCALL -; ; GISEL-X64-LABEL: test_llround_i64_f64: ; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: pushq %rax @@ -264,14 +289,9 @@ define i64 @test_llround_i64_f80(x86_fp80 %x) nounwind { ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; -; SSE2-LABEL: test_llround_i64_f80: -; SSE2: # %bb.0: -; SSE2-NEXT: subl $12, %esp -; SSE2-NEXT: fldt {{[0-9]+}}(%esp) -; SSE2-NEXT: fstpt (%esp) -; SSE2-NEXT: calll llroundl -; SSE2-NEXT: addl $12, %esp -; SSE2-NEXT: retl +; X64-LABEL: test_llround_i64_f80: +; X64: # %bb.0: +; X64-NEXT: jmp llroundl@PLT # TAILCALL ; ; GISEL-X86-LABEL: test_llround_i64_f80: ; GISEL-X86: # %bb.0: @@ -282,10 +302,6 @@ define i64 @test_llround_i64_f80(x86_fp80 %x) nounwind { ; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl ; -; X64-LABEL: test_llround_i64_f80: -; X64: # %bb.0: -; X64-NEXT: jmp llroundl@PLT # TAILCALL -; ; GISEL-X64-LABEL: test_llround_i64_f80: ; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: subq $24, %rsp @@ -297,3 +313,79 @@ define i64 @test_llround_i64_f80(x86_fp80 %x) nounwind { %conv = call i64 @llvm.llround.i64.f80(x86_fp80 %x) ret i64 %conv } + +; FIXME(#44744): incorrect libcall +define i64 @test_llround_i64_f128(fp128 %x) nounwind { +; X86-LABEL: test_llround_i64_f128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl 8(%ebp) +; X86-NEXT: calll llroundl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_llround_i64_f128: +; X64: # %bb.0: +; X64-NEXT: jmp llroundl@PLT # TAILCALL +; +; GISEL-X86-LABEL: test_llround_i64_f128: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll llroundf128 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_llround_i64_f128: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq llroundf128 +; GISEL-X64-NEXT: popq %rcx +; GISEL-X64-NEXT: retq + %conv = call i64 @llvm.llround.i64.f128(fp128 %x) + ret i64 %conv +} + +; FIXME: not yet implemented for global isel +; define i64 @test_llround_i64_f16_strict(half %x) nounwind strictfp { +; %conv = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %conv +; } + +; define i64 @test_llround_i64_f32_strict(float %x) nounwind strictfp { +; %conv = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %conv +; } + +; define i64 @test_llround_i64_f64_strict(double %x) nounwind strictfp { +; %conv = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %conv +; } + +; define i64 @test_llround_i64_f80_strict(x86_fp80 %x) nounwind strictfp { +; %conv = call i64 @llvm.experimental.constrained.llround.i64.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %conv +; } + +; ; FIXME(#44744): incorrect libcall +; define i64 @test_llround_i64_f128_strict(fp128 %x) nounwind strictfp { +; %conv = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %conv +; } diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll index 3c50aea1095f..2b99b4c50f58 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll @@ -8,15 +8,15 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX ; FIXME: crash -; define i32 @testmswh(half %x) nounwind { +; define i32 @test_lrint_i32_f16(half %x) nounwind { ; entry: ; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) ; ret i32 %0 ; } -define i32 @testmsws(float %x) nounwind { -; X86-NOSSE-LABEL: testmsws: -; X86-NOSSE: # %bb.0: # %entry +define i32 @test_lrint_i32_f32(float %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i32_f32: +; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl %eax ; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpl (%esp) @@ -24,33 +24,32 @@ define i32 @testmsws(float %x) nounwind { ; X86-NOSSE-NEXT: popl %ecx ; X86-NOSSE-NEXT: retl ; -; X86-SSE2-LABEL: testmsws: -; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-LABEL: test_lrint_i32_f32: +; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: retl ; -; X86-AVX-LABEL: testmsws: -; X86-AVX: # %bb.0: # %entry +; X86-AVX-LABEL: test_lrint_i32_f32: +; X86-AVX: # %bb.0: ; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: retl ; -; X64-SSE-LABEL: testmsws: -; X64-SSE: # %bb.0: # %entry +; X64-SSE-LABEL: test_lrint_i32_f32: +; X64-SSE: # %bb.0: ; X64-SSE-NEXT: cvtss2si %xmm0, %eax ; X64-SSE-NEXT: retq ; -; X64-AVX-LABEL: testmsws: -; X64-AVX: # %bb.0: # %entry +; X64-AVX-LABEL: test_lrint_i32_f32: +; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vcvtss2si %xmm0, %eax ; X64-AVX-NEXT: retq -entry: - %0 = tail call i32 @llvm.lrint.i32.f32(float %x) - ret i32 %0 + %conv = tail call i32 @llvm.lrint.i32.f32(float %x) + ret i32 %conv } -define i32 @testmswd(double %x) nounwind { -; X86-NOSSE-LABEL: testmswd: -; X86-NOSSE: # %bb.0: # %entry +define i32 @test_lrint_i32_f64(double %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i32_f64: +; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl %eax ; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpl (%esp) @@ -58,33 +57,32 @@ define i32 @testmswd(double %x) nounwind { ; X86-NOSSE-NEXT: popl %ecx ; X86-NOSSE-NEXT: retl ; -; X86-SSE2-LABEL: testmswd: -; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-LABEL: test_lrint_i32_f64: +; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: retl ; -; X86-AVX-LABEL: testmswd: -; X86-AVX: # %bb.0: # %entry +; X86-AVX-LABEL: test_lrint_i32_f64: +; X86-AVX: # %bb.0: ; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: retl ; -; X64-SSE-LABEL: testmswd: -; X64-SSE: # %bb.0: # %entry +; X64-SSE-LABEL: test_lrint_i32_f64: +; X64-SSE: # %bb.0: ; X64-SSE-NEXT: cvtsd2si %xmm0, %eax ; X64-SSE-NEXT: retq ; -; X64-AVX-LABEL: testmswd: -; X64-AVX: # %bb.0: # %entry +; X64-AVX-LABEL: test_lrint_i32_f64: +; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vcvtsd2si %xmm0, %eax ; X64-AVX-NEXT: retq -entry: - %0 = tail call i32 @llvm.lrint.i32.f64(double %x) - ret i32 %0 + %conv = tail call i32 @llvm.lrint.i32.f64(double %x) + ret i32 %conv } -define i32 @testmsll(x86_fp80 %x) nounwind { -; X86-LABEL: testmsll: -; X86: # %bb.0: # %entry +define i32 @test_lrint_i32_f80(x86_fp80 %x) nounwind { +; X86-LABEL: test_lrint_i32_f80: +; X86: # %bb.0: ; X86-NEXT: pushl %eax ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fistpl (%esp) @@ -92,21 +90,20 @@ define i32 @testmsll(x86_fp80 %x) nounwind { ; X86-NEXT: popl %ecx ; X86-NEXT: retl ; -; X64-LABEL: testmsll: -; X64: # %bb.0: # %entry +; X64-LABEL: test_lrint_i32_f80: +; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fistpl -{{[0-9]+}}(%rsp) ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: retq -entry: - %0 = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x) - ret i32 %0 + %conv = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x) + ret i32 %conv } ; FIXME(#44744): incorrect libcall -define i32 @testmswq(fp128 %x) nounwind { -; X86-NOSSE-LABEL: testmswq: -; X86-NOSSE: # %bb.0: # %entry +define i32 @test_lrint_i32_f128(fp128 %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i32_f128: +; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: andl $-16, %esp @@ -121,8 +118,8 @@ define i32 @testmswq(fp128 %x) nounwind { ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; -; X86-SSE2-LABEL: testmswq: -; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-LABEL: test_lrint_i32_f128: +; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp ; X86-SSE2-NEXT: movl %esp, %ebp ; X86-SSE2-NEXT: andl $-16, %esp @@ -137,8 +134,8 @@ define i32 @testmswq(fp128 %x) nounwind { ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl ; -; X86-AVX-LABEL: testmswq: -; X86-AVX: # %bb.0: # %entry +; X86-AVX-LABEL: test_lrint_i32_f128: +; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %ebp ; X86-AVX-NEXT: movl %esp, %ebp ; X86-AVX-NEXT: andl $-16, %esp @@ -150,12 +147,176 @@ define i32 @testmswq(fp128 %x) nounwind { ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl ; -; X64-LABEL: testmswq: -; X64: # %bb.0: # %entry +; X64-LABEL: test_lrint_i32_f128: +; X64: # %bb.0: ; X64-NEXT: jmp lrintl@PLT # TAILCALL -entry: - %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x) - ret i32 %0 + %conv = tail call i32 @llvm.lrint.i32.f128(fp128 %x) + ret i32 %conv +} + +; FIXME: crash +; define i32 @test_lrint_i32_f16_strict(half %x) nounwind strictfp { +; %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i32 %conv +; } + +define i32 @test_lrint_i32_f32_strict(float %x) nounwind strictfp { +; X86-NOSSE-LABEL: test_lrint_i32_f32_strict: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: wait +; X86-NOSSE-NEXT: calll lrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i32_f32_strict: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: calll lrintf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: test_lrint_i32_f32_strict: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vmovss %xmm0, (%esp) +; X86-AVX-NEXT: calll lrintf +; X86-AVX-NEXT: popl %ecx +; X86-AVX-NEXT: retl +; +; X64-LABEL: test_lrint_i32_f32_strict: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq lrintf@PLT +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i32 %conv +} + +define i32 @test_lrint_i32_f64_strict(double %x) nounwind strictfp { +; X86-NOSSE-LABEL: test_lrint_i32_f64_strict: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: wait +; X86-NOSSE-NEXT: calll lrint +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i32_f64_strict: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: calll lrint +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: test_lrint_i32_f64_strict: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: calll lrint +; X86-AVX-NEXT: addl $8, %esp +; X86-AVX-NEXT: retl +; +; X64-LABEL: test_lrint_i32_f64_strict: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq lrint@PLT +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i32 %conv +} + +define i32 @test_lrint_i32_f80_strict(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: test_lrint_i32_f80_strict: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll lrintl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: test_lrint_i32_f80_strict: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq lrintl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq + %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i32 %conv +} + +; FIXME(#44744): incorrect libcall +define i32 @test_lrint_i32_f128_strict(fp128 %x) nounwind strictfp { +; X86-NOSSE-LABEL: test_lrint_i32_f128_strict: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-16, %esp +; X86-NOSSE-NEXT: subl $16, %esp +; X86-NOSSE-NEXT: pushl 20(%ebp) +; X86-NOSSE-NEXT: pushl 16(%ebp) +; X86-NOSSE-NEXT: pushl 12(%ebp) +; X86-NOSSE-NEXT: pushl 8(%ebp) +; X86-NOSSE-NEXT: calll lrintl +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i32_f128_strict: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: pushl 20(%ebp) +; X86-SSE2-NEXT: pushl 16(%ebp) +; X86-SSE2-NEXT: pushl 12(%ebp) +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: test_lrint_i32_f128_strict: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-16, %esp +; X86-AVX-NEXT: subl $32, %esp +; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-LABEL: test_lrint_i32_f128_strict: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq lrintl@PLT +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i32 %conv } declare i32 @llvm.lrint.i32.f32(float) nounwind readnone diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll index 2ba1500df0b6..731c03bf0d74 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll @@ -1,92 +1,311 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX -define i64 @testmsxh(half %x) nounwind { -; SSE-LABEL: testmsxh: -; SSE: # %bb.0: # %entry -; SSE-NEXT: pushq %rax -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: callq rintf@PLT -; SSE-NEXT: callq __truncsfhf2@PLT -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: cvttss2si %xmm0, %rax -; SSE-NEXT: popq %rcx -; SSE-NEXT: retq -entry: - %0 = tail call i64 @llvm.lrint.i64.f16(half %x) - ret i64 %0 -} +; FIXME: crash +; define i64 @test_lrint_i64_f16(half %x) nounwind { +; %conv = tail call i64 @llvm.lrint.i64.f16(half %x) +; ret i64 %conv +; } -define i64 @testmsxs(float %x) nounwind { -; SSE-LABEL: testmsxs: -; SSE: # %bb.0: # %entry +define i64 @test_lrint_i64_f32(float %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i64_f32: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: flds 8(%ebp) +; X86-NOSSE-NEXT: fistpll (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i64_f32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: flds (%esp) +; X86-SSE2-NEXT: fistpll (%esp) +; X86-SSE2-NEXT: movl (%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; SSE-LABEL: test_lrint_i64_f32: +; SSE: # %bb.0: ; SSE-NEXT: cvtss2si %xmm0, %rax ; SSE-NEXT: retq ; -; AVX-LABEL: testmsxs: -; AVX: # %bb.0: # %entry +; AVX-LABEL: test_lrint_i64_f32: +; AVX: # %bb.0: ; AVX-NEXT: vcvtss2si %xmm0, %rax ; AVX-NEXT: retq -entry: - %0 = tail call i64 @llvm.lrint.i64.f32(float %x) - ret i64 %0 + %conv = tail call i64 @llvm.lrint.i64.f32(float %x) + ret i64 %conv } -define i64 @testmsxd(double %x) nounwind { -; SSE-LABEL: testmsxd: -; SSE: # %bb.0: # %entry +define i64 @test_lrint_i64_f64(double %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i64_f64: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: fldl 8(%ebp) +; X86-NOSSE-NEXT: fistpll (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i64_f64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: fldl (%esp) +; X86-SSE2-NEXT: fistpll (%esp) +; X86-SSE2-NEXT: movl (%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; SSE-LABEL: test_lrint_i64_f64: +; SSE: # %bb.0: ; SSE-NEXT: cvtsd2si %xmm0, %rax ; SSE-NEXT: retq ; -; AVX-LABEL: testmsxd: -; AVX: # %bb.0: # %entry +; AVX-LABEL: test_lrint_i64_f64: +; AVX: # %bb.0: ; AVX-NEXT: vcvtsd2si %xmm0, %rax ; AVX-NEXT: retq -entry: - %0 = tail call i64 @llvm.lrint.i64.f64(double %x) - ret i64 %0 + %conv = tail call i64 @llvm.lrint.i64.f64(double %x) + ret i64 %conv } -define i64 @testmsll(x86_fp80 %x) nounwind { -; CHECK-LABEL: testmsll: -; CHECK: # %bb.0: # %entry +define i64 @test_lrint_i64_f80(x86_fp80 %x) nounwind { +; X86-LABEL: test_lrint_i64_f80: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: fldt 8(%ebp) +; X86-NEXT: fistpll (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; CHECK-LABEL: test_lrint_i64_f80: +; CHECK: # %bb.0: ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fistpll -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: retq -entry: - %0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x) - ret i64 %0 + %conv = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x) + ret i64 %conv } ; FIXME(#44744): incorrect libcall -define i64 @testmsxq(fp128 %x) nounwind { -; CHECK-LABEL: testmsxq: -; CHECK: # %bb.0: # %entry +define i64 @test_lrint_i64_f128(fp128 %x) nounwind { +; X86-LABEL: test_lrint_i64_f128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl 8(%ebp) +; X86-NEXT: calll lrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; CHECK-LABEL: test_lrint_i64_f128: +; CHECK: # %bb.0: ; CHECK-NEXT: jmp lrintl@PLT # TAILCALL -entry: - %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x) - ret i64 %0 + %conv = tail call i64 @llvm.lrint.i64.f128(fp128 %x) + ret i64 %conv +} + +; FIXME: crash +; define i64 @test_lrint_i64_f16_strict(half %x) nounwind { +; %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %conv +; } + +define i64 @test_lrint_i64_f32_strict(float %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i64_f32_strict: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll lrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i64_f32_strict: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: calll lrintf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; CHECK-LABEL: test_lrint_i64_f32_strict: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq lrintf@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %conv +} + +define i64 @test_lrint_i64_f64_strict(double %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i64_f64_strict: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: calll lrint +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i64_f64_strict: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: calll lrint +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; CHECK-LABEL: test_lrint_i64_f64_strict: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq lrint@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %conv +} + +define i64 @test_lrint_i64_f80_strict(x86_fp80 %x) nounwind { +; X86-LABEL: test_lrint_i64_f80_strict: +; X86: # %bb.0: +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: calll lrintl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; CHECK-LABEL: test_lrint_i64_f80_strict: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq lrintl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %conv +} + +; FIXME(#44744): incorrect libcall +define i64 @test_lrint_i64_f128_strict(fp128 %x) nounwind { +; X86-LABEL: test_lrint_i64_f128_strict: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl 8(%ebp) +; X86-NEXT: calll lrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; CHECK-LABEL: test_lrint_i64_f128_strict: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq lrintl@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %conv } define i32 @PR125324(float %x) nounwind { +; X86-NOSSE-LABEL: PR125324: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: flds 8(%ebp) +; X86-NOSSE-NEXT: fistpll (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: PR125324: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: flds (%esp) +; X86-SSE2-NEXT: fistpll (%esp) +; X86-SSE2-NEXT: movl (%esp), %eax +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; ; SSE-LABEL: PR125324: -; SSE: # %bb.0: # %entry +; SSE: # %bb.0: ; SSE-NEXT: cvtss2si %xmm0, %rax ; SSE-NEXT: # kill: def $eax killed $eax killed $rax ; SSE-NEXT: retq ; ; AVX-LABEL: PR125324: -; AVX: # %bb.0: # %entry +; AVX: # %bb.0: ; AVX-NEXT: vcvtss2si %xmm0, %rax ; AVX-NEXT: # kill: def $eax killed $eax killed $rax ; AVX-NEXT: retq -entry: - %0 = tail call i64 @llvm.lrint.i64.f32(float %x) - %1 = trunc i64 %0 to i32 - ret i32 %1 + %conv = tail call i64 @llvm.lrint.i64.f32(float %x) + %trunc = trunc i64 %conv to i32 + ret i32 %trunc } declare i64 @llvm.lrint.i64.f32(float) nounwind readnone diff --git a/llvm/test/CodeGen/X86/lround-conv-i32.ll b/llvm/test/CodeGen/X86/lround-conv-i32.ll index c37536623143..389f29233dcc 100644 --- a/llvm/test/CodeGen/X86/lround-conv-i32.ll +++ b/llvm/test/CodeGen/X86/lround-conv-i32.ll @@ -1,17 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64 ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 -define i32 @testmsws(float %x) nounwind { -; CHECK-LABEL: testmsws: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lroundf # TAILCALL +; FIXME: crash +; define i32 @test_lround_i32_f16(half %x) nounwind { +; %conv = tail call i32 @llvm.lround.i32.f16(half %x) +; ret i32 %conv +; } + +define i32 @test_lround_i32_f32(float %x) nounwind { +; X86-LABEL: test_lround_i32_f32: +; X86: # %bb.0: +; X86-NEXT: jmp lroundf # TAILCALL +; +; X64-LABEL: test_lround_i32_f32: +; X64: # %bb.0: +; X64-NEXT: jmp lroundf@PLT # TAILCALL ; -; GISEL-X86-LABEL: testmsws: -; GISEL-X86: # %bb.0: # %entry +; GISEL-X86-LABEL: test_lround_i32_f32: +; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; GISEL-X86-NEXT: movl %eax, (%esp) @@ -19,28 +29,27 @@ define i32 @testmsws(float %x) nounwind { ; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl ; -; X64-LABEL: testmsws: -; X64: # %bb.0: # %entry -; X64-NEXT: jmp lroundf@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmsws: -; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-LABEL: test_lround_i32_f32: +; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: pushq %rax ; GISEL-X64-NEXT: callq lroundf ; GISEL-X64-NEXT: popq %rcx ; GISEL-X64-NEXT: retq -entry: - %0 = tail call i32 @llvm.lround.i32.f32(float %x) - ret i32 %0 + %conv = tail call i32 @llvm.lround.i32.f32(float %x) + ret i32 %conv } -define i32 @testmswd(double %x) nounwind { -; CHECK-LABEL: testmswd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lround # TAILCALL +define i32 @test_lround_i32_f64(double %x) nounwind { +; X86-LABEL: test_lround_i32_f64: +; X86: # %bb.0: +; X86-NEXT: jmp lround # TAILCALL +; +; X64-LABEL: test_lround_i32_f64: +; X64: # %bb.0: +; X64-NEXT: jmp lround@PLT # TAILCALL ; -; GISEL-X86-LABEL: testmswd: -; GISEL-X86: # %bb.0: # %entry +; GISEL-X86-LABEL: test_lround_i32_f64: +; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -53,28 +62,27 @@ define i32 @testmswd(double %x) nounwind { ; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl ; -; X64-LABEL: testmswd: -; X64: # %bb.0: # %entry -; X64-NEXT: jmp lround@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmswd: -; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-LABEL: test_lround_i32_f64: +; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: pushq %rax ; GISEL-X64-NEXT: callq lround ; GISEL-X64-NEXT: popq %rcx ; GISEL-X64-NEXT: retq -entry: - %0 = tail call i32 @llvm.lround.i32.f64(double %x) - ret i32 %0 + %conv = tail call i32 @llvm.lround.i32.f64(double %x) + ret i32 %conv } -define i32 @testmsll(x86_fp80 %x) nounwind { -; CHECK-LABEL: testmsll: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lroundl # TAILCALL +define i32 @test_lround_i32_f80(x86_fp80 %x) nounwind { +; X86-LABEL: test_lround_i32_f80: +; X86: # %bb.0: +; X86-NEXT: jmp lroundl # TAILCALL +; +; X64-LABEL: test_lround_i32_f80: +; X64: # %bb.0: +; X64-NEXT: jmp lroundl@PLT # TAILCALL ; -; GISEL-X86-LABEL: testmsll: -; GISEL-X86: # %bb.0: # %entry +; GISEL-X86-LABEL: test_lround_i32_f80: +; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fstpt (%esp) @@ -82,19 +90,91 @@ define i32 @testmsll(x86_fp80 %x) nounwind { ; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl ; -; X64-LABEL: testmsll: -; X64: # %bb.0: # %entry -; X64-NEXT: jmp lroundl@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmsll: -; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-LABEL: test_lround_i32_f80: +; GISEL-X64: # %bb.0: ; GISEL-X64-NEXT: subq $24, %rsp ; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) ; GISEL-X64-NEXT: fstpt (%rsp) ; GISEL-X64-NEXT: callq lroundl ; GISEL-X64-NEXT: addq $24, %rsp ; GISEL-X64-NEXT: retq -entry: - %0 = tail call i32 @llvm.lround.i32.f80(x86_fp80 %x) - ret i32 %0 + %conv = tail call i32 @llvm.lround.i32.f80(x86_fp80 %x) + ret i32 %conv } + +define i32 @test_lround_i32_f128(fp128 %x) nounwind { +; X86-LABEL: test_lround_i32_f128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl 8(%ebp) +; X86-NEXT: calll lroundl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_lround_i32_f128: +; X64: # %bb.0: +; X64-NEXT: jmp lroundl@PLT # TAILCALL +; +; GISEL-X86-LABEL: test_lround_i32_f128: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll lroundf128 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_lround_i32_f128: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq lroundf128 +; GISEL-X64-NEXT: popq %rcx +; GISEL-X64-NEXT: retq + %conv = tail call i32 @llvm.lround.i32.f128(fp128 %x) + ret i32 %conv +} + +; FIXME: not yet implemented in global isel +; define i32 @test_lround_i32_f16_strict(half %x) nounwind strictfp { +; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i32 %conv +; } + +; define i32 @test_lround_i32_f32_strict(float %x) nounwind strictfp { +; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i32 %conv +; } + +; define i32 @test_lround_i32_f64_strict(double %x) nounwind strictfp { +; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i32 %conv +; } + +; define i32 @test_lround_i32_f80_strict(x86_fp80 %x) nounwind strictfp { +; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i32 %conv +; } + +; define i32 @test_lround_i32_f128_strict(fp128 %x) nounwind strictfp { +; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i32 %conv +; } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; X86-NOSSE: {{.*}} +; X86-SSE2: {{.*}} diff --git a/llvm/test/CodeGen/X86/lround-conv-i64.ll b/llvm/test/CodeGen/X86/lround-conv-i64.ll index 36b86f30ca13..8b8230074728 100644 --- a/llvm/test/CodeGen/X86/lround-conv-i64.ll +++ b/llvm/test/CodeGen/X86/lround-conv-i64.ll @@ -1,42 +1,86 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64 ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 -define i64 @testmsxs(float %x) { -; GISEL-X86-LABEL: testmsxs: +; FIXME: crash +; define i64 @test_lround_i64_f16(half %x) nounwind { +; entry: +; %0 = tail call i64 @llvm.lround.i64.f16(half %x) +; ret i64 %0 +; } + +define i64 @test_lround_i64_f32(float %x) nounwind { +; X86-NOSSE-LABEL: test_lround_i64_f32: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll lroundf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lround_i64_f32: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: calll lroundf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X64-LABEL: test_lround_i64_f32: +; X64: # %bb.0: # %entry +; X64-NEXT: jmp lroundf@PLT # TAILCALL +; +; GISEL-X86-LABEL: test_lround_i64_f32: ; GISEL-X86: # %bb.0: # %entry ; GISEL-X86-NEXT: subl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; GISEL-X86-NEXT: movl %eax, (%esp) ; GISEL-X86-NEXT: calll lroundf ; GISEL-X86-NEXT: addl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 4 ; GISEL-X86-NEXT: retl ; -; CHECK-LABEL: testmsxs: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lroundf@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmsxs: +; GISEL-X64-LABEL: test_lround_i64_f32: ; GISEL-X64: # %bb.0: # %entry ; GISEL-X64-NEXT: pushq %rax -; GISEL-X64-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X64-NEXT: callq lroundf ; GISEL-X64-NEXT: popq %rcx -; GISEL-X64-NEXT: .cfi_def_cfa_offset 8 ; GISEL-X64-NEXT: retq entry: %0 = tail call i64 @llvm.lround.i64.f32(float %x) ret i64 %0 } -define i64 @testmsxd(double %x) { -; GISEL-X86-LABEL: testmsxd: +define i64 @test_lround_i64_f64(double %x) nounwind { +; X86-NOSSE-LABEL: test_lround_i64_f64: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fstpl (%esp) +; X86-NOSSE-NEXT: calll lround +; X86-NOSSE-NEXT: addl $8, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lround_i64_f64: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: calll lround +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; X64-LABEL: test_lround_i64_f64: +; X64: # %bb.0: # %entry +; X64-NEXT: jmp lround@PLT # TAILCALL +; +; GISEL-X86-LABEL: test_lround_i64_f64: ; GISEL-X86: # %bb.0: # %entry ; GISEL-X86-NEXT: subl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; GISEL-X86-NEXT: movl 4(%eax), %eax @@ -46,53 +90,131 @@ define i64 @testmsxd(double %x) { ; GISEL-X86-NEXT: movl %eax, 4(%edx) ; GISEL-X86-NEXT: calll lround ; GISEL-X86-NEXT: addl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 4 ; GISEL-X86-NEXT: retl ; -; CHECK-LABEL: testmsxd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lround@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmsxd: +; GISEL-X64-LABEL: test_lround_i64_f64: ; GISEL-X64: # %bb.0: # %entry ; GISEL-X64-NEXT: pushq %rax -; GISEL-X64-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X64-NEXT: callq lround ; GISEL-X64-NEXT: popq %rcx -; GISEL-X64-NEXT: .cfi_def_cfa_offset 8 ; GISEL-X64-NEXT: retq entry: %0 = tail call i64 @llvm.lround.i64.f64(double %x) ret i64 %0 } -define i64 @testmsll(x86_fp80 %x) { -; GISEL-X86-LABEL: testmsll: +define i64 @test_lround_i64_f80(x86_fp80 %x) nounwind { +; X86-LABEL: test_lround_i64_f80: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: calll lroundl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: test_lround_i64_f80: +; X64: # %bb.0: # %entry +; X64-NEXT: jmp lroundl@PLT # TAILCALL +; +; GISEL-X86-LABEL: test_lround_i64_f80: ; GISEL-X86: # %bb.0: # %entry ; GISEL-X86-NEXT: subl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 16 ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fstpt (%esp) ; GISEL-X86-NEXT: calll lroundl ; GISEL-X86-NEXT: addl $12, %esp -; GISEL-X86-NEXT: .cfi_def_cfa_offset 4 ; GISEL-X86-NEXT: retl ; -; CHECK-LABEL: testmsll: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lroundl@PLT # TAILCALL -; -; GISEL-X64-LABEL: testmsll: +; GISEL-X64-LABEL: test_lround_i64_f80: ; GISEL-X64: # %bb.0: # %entry ; GISEL-X64-NEXT: subq $24, %rsp -; GISEL-X64-NEXT: .cfi_def_cfa_offset 32 ; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) ; GISEL-X64-NEXT: fstpt (%rsp) ; GISEL-X64-NEXT: callq lroundl ; GISEL-X64-NEXT: addq $24, %rsp -; GISEL-X64-NEXT: .cfi_def_cfa_offset 8 ; GISEL-X64-NEXT: retq entry: %0 = tail call i64 @llvm.lround.i64.f80(x86_fp80 %x) ret i64 %0 } + +define i64 @test_lround_i64_f128(fp128 %x) nounwind { +; X86-LABEL: test_lround_i64_f128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl 8(%ebp) +; X86-NEXT: calll lroundl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_lround_i64_f128: +; X64: # %bb.0: # %entry +; X64-NEXT: jmp lroundl@PLT # TAILCALL +; +; GISEL-X86-LABEL: test_lround_i64_f128: +; GISEL-X86: # %bb.0: # %entry +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $24, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll lroundf128 +; GISEL-X86-NEXT: addl $24, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_lround_i64_f128: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq lroundf128 +; GISEL-X64-NEXT: popq %rcx +; GISEL-X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x) + ret i64 %0 +} + +; FIXME: not yet implemented in global isel +; define i64 @test_lround_i64_f16_strict(half %x) nounwind strictfp { +; entry: +; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %0 +; } + +; define i64 @test_lround_i64_f32_strict(float %x) nounwind strictfp { +; entry: +; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f32(float %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %0 +; } + +; define i64 @test_lround_i64_f64_strict(double %x) nounwind strictfp { +; entry: +; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f64(double %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %0 +; } + +; define i64 @test_lround_i64_f80_strict(x86_fp80 %x) nounwind strictfp { +; entry: +; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f80(x86_fp80 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %0 +; } + +; define i64 @test_lround_i64_f128_strict(fp128 %x) nounwind strictfp { +; entry: +; %0 = tail call i64 @llvm.experimental.constrained.lround.i64.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") +; ret i64 %0 +; } diff --git a/llvm/test/CodeGen/X86/lvi-hardening-ret.ll b/llvm/test/CodeGen/X86/lvi-hardening-ret.ll index faa8bff8f094..954985a3798b 100644 --- a/llvm/test/CodeGen/X86/lvi-hardening-ret.ll +++ b/llvm/test/CodeGen/X86/lvi-hardening-ret.ll @@ -41,9 +41,9 @@ entry: %add = add nsw i32 %0, %1 ret i32 %add ; CHECK-NOT: retq -; CHECK: popq %rcx +; CHECK: popq %rsi ; CHECK-NEXT: lfence -; CHECK-NEXT: jmpq *%rcx +; CHECK-NEXT: jmpq *%rsi } ; Function Attrs: noinline nounwind optnone uwtable @@ -52,9 +52,9 @@ define dso_local preserve_mostcc void @preserve_most() #0 { entry: ret void ; CHECK-NOT: retq -; CHECK: popq %rax +; CHECK: popq %r11 ; CHECK-NEXT: lfence -; CHECK-NEXT: jmpq *%rax +; CHECK-NEXT: jmpq *%r11 } ; Function Attrs: noinline nounwind optnone uwtable @@ -63,9 +63,9 @@ define dso_local preserve_allcc void @preserve_all() #0 { entry: ret void ; CHECK-NOT: retq -; CHECK: popq %rax +; CHECK: popq %r11 ; CHECK-NEXT: lfence -; CHECK-NEXT: jmpq *%rax +; CHECK-NEXT: jmpq *%r11 } define { i64, i128 } @ret_i64_i128() #0 { diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll index fb2433dbbb1e..7c9adaf31aff 100644 --- a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll @@ -730,36 +730,36 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vmovdqa (%rdi), %xmm2 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5 ; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsubq %xmm0, %xmm4, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm0, %xmm5, %xmm0 ; AVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm6 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm7 ; AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1] -; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9 +; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9 ; AVX1-NEXT: vpmuludq %xmm0, %xmm9, %xmm0 -; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4 -; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4 -; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 +; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0 ; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 -; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4 +; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5 ; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 -; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7 +; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7 ; AVX1-NEXT: vpmuludq %xmm7, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 -; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4 +; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 +; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 -; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5 -; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4 +; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -767,20 +767,20 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin ; AVX2-LABEL: vec256_i64_signed_mem_reg: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa (%rdi), %ymm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1] -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm3 -; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1] +; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3 ; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpsubq %ymm0, %ymm3, %ymm0 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm4 ; AVX2-NEXT: vpsrlq $33, %ymm0, %ymm0 -; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3 -; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3 -; AVX2-NEXT: vpaddq %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 +; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2 ; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2 +; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 +; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2 ; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq @@ -790,36 +790,36 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1 ; XOP-NEXT: vmovdqa (%rdi), %xmm2 ; XOP-NEXT: vmovdqa 16(%rdi), %xmm3 -; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm4 +; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm4 +; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm5 ; XOP-NEXT: vpsubq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: vpxor %xmm4, %xmm0, %xmm0 -; XOP-NEXT: vpsubq %xmm0, %xmm4, %xmm0 -; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm5 +; XOP-NEXT: vpxor %xmm5, %xmm0, %xmm0 +; XOP-NEXT: vpsubq %xmm0, %xmm5, %xmm0 ; XOP-NEXT: vpsubq %xmm1, %xmm3, %xmm1 -; XOP-NEXT: vpxor %xmm5, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm1, %xmm5, %xmm1 +; XOP-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; XOP-NEXT: vpsubq %xmm1, %xmm4, %xmm1 ; XOP-NEXT: vpsrlq $1, %xmm1, %xmm6 ; XOP-NEXT: vpsrlq $1, %xmm0, %xmm7 ; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0 ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1] -; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9 +; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9 ; XOP-NEXT: vpmuludq %xmm0, %xmm9, %xmm0 -; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4 -; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5 +; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0 ; XOP-NEXT: vpsllq $32, %xmm0, %xmm0 -; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm4 +; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm5 ; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpor %xmm5, %xmm8, %xmm7 +; XOP-NEXT: vpor %xmm4, %xmm8, %xmm7 ; XOP-NEXT: vpmuludq %xmm7, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5 -; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 -; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1 +; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4 +; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 +; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 ; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm5 -; XOP-NEXT: vpaddq %xmm3, %xmm5, %xmm3 +; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm4 +; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2 +; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; XOP-NEXT: retq @@ -900,36 +900,36 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vmovdqa (%rdi), %xmm2 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm5 ; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm3 -; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpsubq %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpsubq %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlq $1, %xmm3, %xmm6 ; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm7 ; AVX1-NEXT: vpsrlq $33, %xmm2, %xmm2 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1] -; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9 +; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9 ; AVX1-NEXT: vpmuludq %xmm2, %xmm9, %xmm2 -; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4 -; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4 -; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 +; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 -; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4 +; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5 ; AVX1-NEXT: vpsrlq $33, %xmm3, %xmm3 -; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7 +; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7 ; AVX1-NEXT: vpmuludq %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 -; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4 +; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 +; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 -; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5 -; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4 +; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0 ; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -937,20 +937,20 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin ; AVX2-LABEL: vec256_i64_signed_reg_mem: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa (%rdi), %ymm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1] -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm3 -; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1] +; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3 ; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm1 -; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpsubq %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4 ; AVX2-NEXT: vpsrlq $33, %ymm1, %ymm1 -; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3 -; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3 -; AVX2-NEXT: vpaddq %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1 +; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2 ; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2 +; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1 +; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2 ; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -960,36 +960,36 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1 ; XOP-NEXT: vmovdqa (%rdi), %xmm2 ; XOP-NEXT: vmovdqa 16(%rdi), %xmm3 -; XOP-NEXT: vpcomgtq %xmm2, %xmm0, %xmm4 +; XOP-NEXT: vpcomgtq %xmm3, %xmm1, %xmm4 +; XOP-NEXT: vpcomgtq %xmm2, %xmm0, %xmm5 ; XOP-NEXT: vpsubq %xmm2, %xmm0, %xmm2 -; XOP-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; XOP-NEXT: vpsubq %xmm2, %xmm4, %xmm2 -; XOP-NEXT: vpcomgtq %xmm3, %xmm1, %xmm5 +; XOP-NEXT: vpxor %xmm5, %xmm2, %xmm2 +; XOP-NEXT: vpsubq %xmm2, %xmm5, %xmm2 ; XOP-NEXT: vpsubq %xmm3, %xmm1, %xmm3 -; XOP-NEXT: vpxor %xmm5, %xmm3, %xmm3 -; XOP-NEXT: vpsubq %xmm3, %xmm5, %xmm3 +; XOP-NEXT: vpxor %xmm4, %xmm3, %xmm3 +; XOP-NEXT: vpsubq %xmm3, %xmm4, %xmm3 ; XOP-NEXT: vpsrlq $1, %xmm3, %xmm6 ; XOP-NEXT: vpsrlq $1, %xmm2, %xmm7 ; XOP-NEXT: vpsrlq $33, %xmm2, %xmm2 ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1] -; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9 +; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9 ; XOP-NEXT: vpmuludq %xmm2, %xmm9, %xmm2 -; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4 -; XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2 +; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5 +; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; XOP-NEXT: vpsllq $32, %xmm2, %xmm2 -; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm4 +; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm5 ; XOP-NEXT: vpsrlq $33, %xmm3, %xmm3 -; XOP-NEXT: vpor %xmm5, %xmm8, %xmm7 +; XOP-NEXT: vpor %xmm4, %xmm8, %xmm7 ; XOP-NEXT: vpmuludq %xmm7, %xmm3, %xmm3 -; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5 -; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 -; XOP-NEXT: vpaddq %xmm3, %xmm5, %xmm3 +; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4 +; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 +; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; XOP-NEXT: vpsllq $32, %xmm3, %xmm3 -; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm5 -; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1 +; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm4 +; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 ; XOP-NEXT: vpaddq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0 ; XOP-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; XOP-NEXT: retq @@ -1071,36 +1071,36 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; AVX1-NEXT: vmovdqa 16(%rsi), %xmm1 ; AVX1-NEXT: vmovdqa (%rdi), %xmm2 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5 ; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsubq %xmm0, %xmm4, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm0, %xmm5, %xmm0 ; AVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm6 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm7 ; AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1] -; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9 +; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9 ; AVX1-NEXT: vpmuludq %xmm0, %xmm9, %xmm0 -; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4 -; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4 -; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 +; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0 ; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 -; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4 +; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5 ; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 -; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7 +; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7 ; AVX1-NEXT: vpmuludq %xmm7, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 -; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4 +; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 +; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 -; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5 -; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4 +; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -1109,20 +1109,20 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 ; AVX2-NEXT: vmovdqa (%rsi), %ymm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1] -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm3 -; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1] +; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3 ; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm1 -; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpsubq %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4 ; AVX2-NEXT: vpsrlq $33, %ymm1, %ymm1 -; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3 -; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3 -; AVX2-NEXT: vpaddq %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1 +; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2 ; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2 +; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1 +; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2 ; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1133,36 +1133,36 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; XOP-NEXT: vmovdqa 16(%rsi), %xmm1 ; XOP-NEXT: vmovdqa (%rdi), %xmm2 ; XOP-NEXT: vmovdqa 16(%rdi), %xmm3 -; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm4 +; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm4 +; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm5 ; XOP-NEXT: vpsubq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: vpxor %xmm4, %xmm0, %xmm0 -; XOP-NEXT: vpsubq %xmm0, %xmm4, %xmm0 -; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm5 +; XOP-NEXT: vpxor %xmm5, %xmm0, %xmm0 +; XOP-NEXT: vpsubq %xmm0, %xmm5, %xmm0 ; XOP-NEXT: vpsubq %xmm1, %xmm3, %xmm1 -; XOP-NEXT: vpxor %xmm5, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm1, %xmm5, %xmm1 +; XOP-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; XOP-NEXT: vpsubq %xmm1, %xmm4, %xmm1 ; XOP-NEXT: vpsrlq $1, %xmm1, %xmm6 ; XOP-NEXT: vpsrlq $1, %xmm0, %xmm7 ; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0 ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1] -; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9 +; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9 ; XOP-NEXT: vpmuludq %xmm0, %xmm9, %xmm0 -; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4 -; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5 +; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0 ; XOP-NEXT: vpsllq $32, %xmm0, %xmm0 -; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm4 +; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm5 ; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpor %xmm5, %xmm8, %xmm7 +; XOP-NEXT: vpor %xmm4, %xmm8, %xmm7 ; XOP-NEXT: vpmuludq %xmm7, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5 -; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 -; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1 +; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4 +; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 +; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 ; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm5 -; XOP-NEXT: vpaddq %xmm3, %xmm5, %xmm3 +; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm4 +; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2 +; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; XOP-NEXT: retq diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll index a730ef519c01..a478577155f1 100644 --- a/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-512 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK declare <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr) declare <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr) diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll index 583e16351652..62613d773a36 100644 --- a/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll +++ b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-256 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK define <2 x i64> @test_mm_movrsb_epu8(ptr %__A) { ; CHECK-LABEL: test_mm_movrsb_epu8: diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index 9e398096bfcc..693d1992091b 100644 --- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -93,10 +93,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; SSE2-NEXT: psrld $1, %xmm0 ; SSE2-NEXT: pslld $31, %xmm3 ; SSE2-NEXT: por %xmm0, %xmm3 -; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [715827883,715827883,715827883,715827883] +; SSE2-NEXT: pcmpgtd %xmm3, %xmm0 ; SSE2-NEXT: retq ; ; SSE4-LABEL: p4_vector_urem_by_const__splat: @@ -104,9 +102,9 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE4-NEXT: psrld $1, %xmm0 -; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882] -; SSE4-NEXT: pminud %xmm0, %xmm1 -; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883] +; SSE4-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: retq ; ; AVX2-LABEL: p4_vector_urem_by_const__splat: @@ -116,9 +114,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] ; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882] -; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883] +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 8d155bd57df1..1e3204dfc999 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -16,9 +16,9 @@ ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis -; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: Machine Branch Probability Analysis diff --git a/llvm/test/CodeGen/X86/peep-test-5.ll b/llvm/test/CodeGen/X86/peep-test-5.ll index 52bcbe9f83d7..a4af93b81023 100644 --- a/llvm/test/CodeGen/X86/peep-test-5.ll +++ b/llvm/test/CodeGen/X86/peep-test-5.ll @@ -51,3 +51,54 @@ end: } declare void @free_object() + +; Check TEST instruction would not be combined with CMP. +define i1 @pr155586(i8 %0) { +; CHECK-LABEL: pr155586: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpb $1, %dil +; CHECK-NEXT: setne %cl +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: sete %al +; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: retq +entry: + %cmp88.not = icmp eq i8 %0, 1 + %1 = and i8 %0, 1 + %tobool161.not = icmp eq i8 %1, 0 + %common.ret.op = select i1 %cmp88.not, i1 false, i1 %tobool161.not + ret i1 %common.ret.op +} + +; Check TEST8rr instruction would not be combined with TEST8ri. +define i32 @pr155828() { +; CHECK-LABEL: pr155828: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB2_1: # %func_188.exit.i.i +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: jne .LBB2_1 +; CHECK-NEXT: # %bb.2: # %if.else.i.i.i +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: setg %al +; CHECK-NEXT: retq +entry: + br label %func_188.exit.i.i + +func_188.exit.i.i: ; preds = %func_188.exit.i.i, %entry + %or659.i167180.i.i = phi i32 [ 0, %entry ], [ 1, %func_188.exit.i.i ] + %conv48.i.i = trunc i32 %or659.i167180.i.i to i8 + %and.i.i.i = and i32 %or659.i167180.i.i, 1 + %tobool80.not.i.i.i = icmp eq i32 %and.i.i.i, 0 + br i1 %tobool80.not.i.i.i, label %if.else.i.i.i, label %func_188.exit.i.i + +if.else.i.i.i: ; preds = %func_188.exit.i.i + %cmp183.i.i.i = icmp sgt i8 %conv48.i.i, 0 + %ext = zext i1 %cmp183.i.i.i to i32 + ret i32 %ext +} diff --git a/llvm/test/CodeGen/X86/pr156256.ll b/llvm/test/CodeGen/X86/pr156256.ll new file mode 100644 index 000000000000..13caa6fee587 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr156256.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX512VL + +define <16 x i16> @PR156256(<16 x i32> %a, <16 x i32> %b) { +; AVX512-LABEL: PR156256: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; AVX512-NEXT: vpmovm2d %k0, %zmm0 +; AVX512-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq +; +; AVX512VL-LABEL: PR156256: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; AVX512VL-NEXT: vpmovm2d %k0, %zmm0 +; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 +; AVX512VL-NEXT: retq + %icmp = icmp ugt <16 x i32> %a, %b + %sext = sext <16 x i1> %icmp to <16 x i16> + %and = and <16 x i16> %sext, splat (i16 16256) + ret <16 x i16> %and +} diff --git a/llvm/test/CodeGen/X86/pr156817.ll b/llvm/test/CodeGen/X86/pr156817.ll new file mode 100644 index 000000000000..80972ecc5abb --- /dev/null +++ b/llvm/test/CodeGen/X86/pr156817.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 -mattr=+egpr | FileCheck %s --check-prefix=EGPR + +define coldcc i32 @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq bar@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: retq +; +; EGPR-LABEL: foo: +; EGPR: # %bb.0: +; EGPR-NEXT: pushq %rax +; EGPR-NEXT: callq bar@PLT +; EGPR-NEXT: popq %r16 +; EGPR-NEXT: retq + %1 = tail call coldcc i32 @bar() + ret i32 %1 +} + +declare coldcc i32 @bar() diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index c3c96e822879..6a0c13526ac1 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -260,7 +260,6 @@ define void @verifier_error_reduced_issue38788(i1 %cmp11) { ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: .cfi_offset %ebx, -8 -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: jmp .LBB1_1 @@ -272,10 +271,9 @@ define void @verifier_error_reduced_issue38788(i1 %cmp11) { ; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: movl %edx, %ebx -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: .LBB1_1: # %for.cond ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: je .LBB1_3 ; CHECK-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: xorl %eax, %eax @@ -283,12 +281,11 @@ define void @verifier_error_reduced_issue38788(i1 %cmp11) { ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_3: # %if.end ; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: je .LBB1_4 ; CHECK-NEXT: # %bb.9: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %ebx, %eax ; CHECK-NEXT: movl $0, %ebx ; CHECK-NEXT: jne .LBB1_8 diff --git a/llvm/test/CodeGen/X86/pr40289-64bit.ll b/llvm/test/CodeGen/X86/pr40289-64bit.ll index 58da5258a670..96c8377eb0f0 100644 --- a/llvm/test/CodeGen/X86/pr40289-64bit.ll +++ b/llvm/test/CodeGen/X86/pr40289-64bit.ll @@ -6,5 +6,5 @@ define cc 92 < 9 x i64 > @clobber() { ret < 9 x i64 > undef ; CHECK-LABEL: clobber: ; CHECK-NOT: popq %rsp - ; CHECK: addq $8, %rsp + ; CHECK: popq %rax } diff --git a/llvm/test/CodeGen/X86/pr40289.ll b/llvm/test/CodeGen/X86/pr40289.ll index 851b23c002bd..21e50931b40f 100644 --- a/llvm/test/CodeGen/X86/pr40289.ll +++ b/llvm/test/CodeGen/X86/pr40289.ll @@ -6,5 +6,5 @@ define < 3 x i32 > @clobber() { ret < 3 x i32 > undef ; CHECK-LABEL: clobber: ; CHECK-NOT: popl %esp - ; CHECK: addl $4, %esp + ; CHECK: popl %eax } diff --git a/llvm/test/CodeGen/X86/pr67333.ll b/llvm/test/CodeGen/X86/pr67333.ll index 946380971988..accdd04f084d 100644 --- a/llvm/test/CodeGen/X86/pr67333.ll +++ b/llvm/test/CodeGen/X86/pr67333.ll @@ -7,19 +7,25 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0 define void @SHA256_Compress_Generic(ptr noundef %ctx) #1 { ; CHECK-LABEL: SHA256_Compress_Generic: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movbel 0, %eax -; CHECK-NEXT: movbel 12(%rdi), %ecx +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: #APP +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movl 12(%rdi), %ecx +; CHECK-NEXT: #APP +; CHECK-NEXT: bswapl %ecx +; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmovd %eax, %xmm0 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,0,1,2,3,128,128,128,128,128,128,128,128] ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2 ; CHECK-NEXT: vpsrld $17, %xmm2, %xmm0 ; CHECK-NEXT: vpslld $15, %xmm2, %xmm3 -; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vpsrld $19, %xmm2, %xmm3 +; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm3 +; CHECK-NEXT: vpsrld $19, %xmm2, %xmm0 ; CHECK-NEXT: vpslld $13, %xmm2, %xmm4 -; CHECK-NEXT: vpor %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm3 -; CHECK-NEXT: vpxor %xmm2, %xmm3, %xmm0 +; CHECK-NEXT: vpor %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vmovd %ecx, %xmm4 ; CHECK-NEXT: vpshufb %xmm1, %xmm4, %xmm1 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/pr90844.ll b/llvm/test/CodeGen/X86/pr90844.ll deleted file mode 100644 index b250c3f6f9a2..000000000000 --- a/llvm/test/CodeGen/X86/pr90844.ll +++ /dev/null @@ -1,36 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-evex512 < %s | FileCheck %s - -define void @PR90844() { -; CHECK-LABEL: PR90844: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, (%rax) -; CHECK-NEXT: retq -entry: - %0 = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> poison, <2 x i32> poison, <2 x i32> <i32 8, i32 24>) - %1 = and <2 x i32> %0, <i32 16711935, i32 -134152448> - %2 = or disjoint <2 x i32> zeroinitializer, %1 - %3 = zext <2 x i32> %2 to <2 x i64> - %4 = shl nuw <2 x i64> %3, <i64 32, i64 32> - %5 = or disjoint <2 x i64> %4, zeroinitializer - store <2 x i64> %5, ptr poison, align 16 - ret void -} - -define void @foo(ptr %0) { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; CHECK-NEXT: vpxor 32(%rdi), %ymm0, %ymm1 -; CHECK-NEXT: vpxor (%rdi), %ymm0, %ymm0 -; CHECK-NEXT: vmovdqa %ymm0, (%rdi) -; CHECK-NEXT: vmovdqa %ymm1, 32(%rdi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %1 = load <32 x half>, ptr %0 - %2 = fneg <32 x half> %1 - store <32 x half> %2, ptr %0 - ret void -} diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll index 9323cd5b1917..7462c7748282 100644 --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -938,3 +938,206 @@ define i128 @lshr_shl_mask(i128 %a0) { %2 = lshr i128 %1, 1 ret i128 %2 } + +define i128 @shift_i128_limited_shamt(i128 noundef %a, i32 noundef %b) nounwind { +; i686-LABEL: shift_i128_limited_shamt: +; i686: # %bb.0: # %start +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: andl $-16, %esp +; i686-NEXT: subl $16, %esp +; i686-NEXT: movl 32(%ebp), %ebx +; i686-NEXT: movl 28(%ebp), %edi +; i686-NEXT: movzbl 40(%ebp), %ecx +; i686-NEXT: movb $6, %dl +; i686-NEXT: subb %cl, %dl +; i686-NEXT: addb $-7, %cl +; i686-NEXT: movl %edi, %eax +; i686-NEXT: shrl %eax +; i686-NEXT: shrl %cl, %eax +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: orl %eax, %ebx +; i686-NEXT: movl 24(%ebp), %esi +; i686-NEXT: movl %esi, %eax +; i686-NEXT: shll %cl, %eax +; i686-NEXT: shldl %cl, %esi, %edi +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl 8(%ebp), %edi +; i686-NEXT: movl 36(%ebp), %esi +; i686-NEXT: movl 32(%ebp), %edx +; i686-NEXT: shldl %cl, %edx, %esi +; i686-NEXT: movl %esi, 12(%edi) +; i686-NEXT: movl %ebx, 8(%edi) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 4(%edi) +; i686-NEXT: movl %eax, (%edi) +; i686-NEXT: movl %edi, %eax +; i686-NEXT: leal -12(%ebp), %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl $4 +; +; x86_64-LABEL: shift_i128_limited_shamt: +; x86_64: # %bb.0: # %start +; x86_64-NEXT: movq %rdi, %rax +; x86_64-NEXT: movb $6, %cl +; x86_64-NEXT: subb %dl, %cl +; x86_64-NEXT: shldq %cl, %rdi, %rsi +; x86_64-NEXT: shlq %cl, %rax +; x86_64-NEXT: movq %rsi, %rdx +; x86_64-NEXT: retq +start: + %shamt = sub nuw nsw i32 6, %b + %ext = zext nneg i32 %shamt to i128 + %res = shl i128 %a, %ext + ret i128 %res +} + +define i128 @shift_i128_limited_shamt_no_nuw(i128 noundef %a, i32 noundef %b) nounwind { +; i686-LABEL: shift_i128_limited_shamt_no_nuw: +; i686: # %bb.0: # %start +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: andl $-16, %esp +; i686-NEXT: subl $48, %esp +; i686-NEXT: movzbl 40(%ebp), %eax +; i686-NEXT: movl 24(%ebp), %ecx +; i686-NEXT: movl 28(%ebp), %edx +; i686-NEXT: movl 32(%ebp), %esi +; i686-NEXT: movl 36(%ebp), %edi +; i686-NEXT: movl %edi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %esi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; i686-NEXT: movb $6, %cl +; i686-NEXT: subb %al, %cl +; i686-NEXT: movl %ecx, %eax +; i686-NEXT: shrb $3, %al +; i686-NEXT: andb $12, %al +; i686-NEXT: negb %al +; i686-NEXT: movsbl %al, %eax +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, (%esp) +; i686-NEXT: movl 20(%esp,%eax), %edx +; i686-NEXT: movl 24(%esp,%eax), %ebx +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: shldl %cl, %edx, %edi +; i686-NEXT: movl 16(%esp,%eax), %esi +; i686-NEXT: movl 28(%esp,%eax), %eax +; i686-NEXT: shldl %cl, %ebx, %eax +; i686-NEXT: movl 8(%ebp), %ebx +; i686-NEXT: movl %eax, 12(%ebx) +; i686-NEXT: movl %edi, 8(%ebx) +; i686-NEXT: movl %esi, %eax +; i686-NEXT: shll %cl, %eax +; i686-NEXT: shldl %cl, %esi, %edx +; i686-NEXT: movl %edx, 4(%ebx) +; i686-NEXT: movl %eax, (%ebx) +; i686-NEXT: movl %ebx, %eax +; i686-NEXT: leal -12(%ebp), %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl $4 +; +; x86_64-LABEL: shift_i128_limited_shamt_no_nuw: +; x86_64: # %bb.0: # %start +; x86_64-NEXT: movb $6, %cl +; x86_64-NEXT: subb %dl, %cl +; x86_64-NEXT: shldq %cl, %rdi, %rsi +; x86_64-NEXT: shlq %cl, %rdi +; x86_64-NEXT: xorl %eax, %eax +; x86_64-NEXT: testb $64, %cl +; x86_64-NEXT: cmovneq %rdi, %rsi +; x86_64-NEXT: cmoveq %rdi, %rax +; x86_64-NEXT: movq %rsi, %rdx +; x86_64-NEXT: retq +start: + %shamt = sub nsw i32 6, %b + %ext = zext nneg i32 %shamt to i128 + %res = shl i128 %a, %ext + ret i128 %res +} + +define i128 @shift_i128_limited_shamt_unknown_lhs(i128 noundef %a, i32 noundef %b, i32 noundef %c) nounwind { +; i686-LABEL: shift_i128_limited_shamt_unknown_lhs: +; i686: # %bb.0: # %start +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: andl $-16, %esp +; i686-NEXT: subl $48, %esp +; i686-NEXT: movl 24(%ebp), %eax +; i686-NEXT: movl 28(%ebp), %edx +; i686-NEXT: movl 32(%ebp), %esi +; i686-NEXT: movl 36(%ebp), %edi +; i686-NEXT: movl 44(%ebp), %ecx +; i686-NEXT: subl 40(%ebp), %ecx +; i686-NEXT: movl %edi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %esi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, (%esp) +; i686-NEXT: movl %ecx, %eax +; i686-NEXT: shrb $3, %al +; i686-NEXT: andb $12, %al +; i686-NEXT: negb %al +; i686-NEXT: movsbl %al, %eax +; i686-NEXT: movl 20(%esp,%eax), %edx +; i686-NEXT: movl 24(%esp,%eax), %ebx +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: shldl %cl, %edx, %edi +; i686-NEXT: movl 16(%esp,%eax), %esi +; i686-NEXT: movl 28(%esp,%eax), %eax +; i686-NEXT: shldl %cl, %ebx, %eax +; i686-NEXT: movl 8(%ebp), %ebx +; i686-NEXT: movl %eax, 12(%ebx) +; i686-NEXT: movl %edi, 8(%ebx) +; i686-NEXT: movl %esi, %eax +; i686-NEXT: shll %cl, %eax +; i686-NEXT: # kill: def $cl killed $cl killed $ecx +; i686-NEXT: shldl %cl, %esi, %edx +; i686-NEXT: movl %edx, 4(%ebx) +; i686-NEXT: movl %eax, (%ebx) +; i686-NEXT: movl %ebx, %eax +; i686-NEXT: leal -12(%ebp), %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl $4 +; +; x86_64-LABEL: shift_i128_limited_shamt_unknown_lhs: +; x86_64: # %bb.0: # %start +; x86_64-NEXT: subl %edx, %ecx +; x86_64-NEXT: shldq %cl, %rdi, %rsi +; x86_64-NEXT: shlq %cl, %rdi +; x86_64-NEXT: xorl %eax, %eax +; x86_64-NEXT: testb $64, %cl +; x86_64-NEXT: cmovneq %rdi, %rsi +; x86_64-NEXT: cmoveq %rdi, %rax +; x86_64-NEXT: movq %rsi, %rdx +; x86_64-NEXT: retq +start: + %shamt = sub nuw nsw i32 %c, %b + %ext = zext nneg i32 %shamt to i128 + %res = shl i128 %a, %ext + ret i128 %res +} diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll index 825a11d66cd4..8d99ad07e22e 100644 --- a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll +++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s -; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2 | FileCheck %s define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: test_int_x86_vsm4key4128: diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll b/llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll index 4b0f63f9a638..cd576b19f876 100644 --- a/llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll +++ b/llvm/test/CodeGen/X86/stack-folding-int-avxvnni.ll @@ -8,10 +8,10 @@ declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>) declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>) declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>) declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>) -declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>) -declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>) +declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <16 x i8>, <16 x i8>) +declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <32 x i8>, <32 x i8>) +declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <16 x i8>, <16 x i8>) +declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <32 x i8>, <32 x i8>) define <4 x i32> @stack_fold_vpdpwssd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { ; CHECK-LABEL: stack_fold_vpdpwssd: @@ -125,7 +125,7 @@ define <8 x i32> @stack_fold_vpdpwssds_256_commuted(<8 x i32> %a0, <8 x i32> %a1 ret <8 x i32> %2 } -define <4 x i32> @stack_fold_vpdpbusd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +define <4 x i32> @stack_fold_vpdpbusd(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) { ; CHECK-LABEL: stack_fold_vpdpbusd: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -135,11 +135,11 @@ define <4 x i32> @stack_fold_vpdpbusd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a ; CHECK-NEXT: {vex} vpdpbusd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) + %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) ret <4 x i32> %2 } -define <4 x i32> @stack_fold_vpdpbusd_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +define <4 x i32> @stack_fold_vpdpbusd_commuted(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) { ; CHECK-LABEL: stack_fold_vpdpbusd_commuted: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -150,11 +150,11 @@ define <4 x i32> @stack_fold_vpdpbusd_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 ; CHECK-NEXT: {vex} vpdpbusd %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1) + %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <16 x i8> %a2, <16 x i8> %a1) ret <4 x i32> %2 } -define <8 x i32> @stack_fold_vpdpbusd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +define <8 x i32> @stack_fold_vpdpbusd_256(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) { ; CHECK-LABEL: stack_fold_vpdpbusd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -164,11 +164,11 @@ define <8 x i32> @stack_fold_vpdpbusd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32 ; CHECK-NEXT: {vex} vpdpbusd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) + %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) ret <8 x i32> %2 } -define <8 x i32> @stack_fold_vpdpbusd_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +define <8 x i32> @stack_fold_vpdpbusd_256_commuted(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) { ; CHECK-LABEL: stack_fold_vpdpbusd_256_commuted: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -179,11 +179,11 @@ define <8 x i32> @stack_fold_vpdpbusd_256_commuted(<8 x i32> %a0, <8 x i32> %a1, ; CHECK-NEXT: {vex} vpdpbusd %ymm1, %ymm2, %ymm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1) + %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <32 x i8> %a2, <32 x i8> %a1) ret <8 x i32> %2 } -define <4 x i32> @stack_fold_vpdpbusds(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +define <4 x i32> @stack_fold_vpdpbusds(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) { ; CHECK-LABEL: stack_fold_vpdpbusds: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -193,11 +193,11 @@ define <4 x i32> @stack_fold_vpdpbusds(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> % ; CHECK-NEXT: {vex} vpdpbusds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) + %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) ret <4 x i32> %2 } -define <4 x i32> @stack_fold_vpdpbusds_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +define <4 x i32> @stack_fold_vpdpbusds_commuted(<4 x i32> %a0, <16 x i8> %a1, <16 x i8> %a2) { ; CHECK-LABEL: stack_fold_vpdpbusds_commuted: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -208,11 +208,11 @@ define <4 x i32> @stack_fold_vpdpbusds_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 ; CHECK-NEXT: {vex} vpdpbusds %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1) + %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <16 x i8> %a2, <16 x i8> %a1) ret <4 x i32> %2 } -define <8 x i32> @stack_fold_vpdpbusds_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +define <8 x i32> @stack_fold_vpdpbusds_256(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) { ; CHECK-LABEL: stack_fold_vpdpbusds_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -222,11 +222,11 @@ define <8 x i32> @stack_fold_vpdpbusds_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i3 ; CHECK-NEXT: {vex} vpdpbusds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) + %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) ret <8 x i32> %2 } -define <8 x i32> @stack_fold_vpdpbusds_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +define <8 x i32> @stack_fold_vpdpbusds_256_commuted(<8 x i32> %a0, <32 x i8> %a1, <32 x i8> %a2) { ; CHECK-LABEL: stack_fold_vpdpbusds_256_commuted: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -237,6 +237,6 @@ define <8 x i32> @stack_fold_vpdpbusds_256_commuted(<8 x i32> %a0, <8 x i32> %a1 ; CHECK-NEXT: {vex} vpdpbusds %ymm1, %ymm2, %ymm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1) + %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <32 x i8> %a2, <32 x i8> %a1) ret <8 x i32> %2 } diff --git a/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll index 35688e59fc9f..766ccdbada53 100644 --- a/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll +++ b/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll @@ -79,7 +79,7 @@ define <8 x half> @f11(<2 x double> %a0, <8 x half> %a1) #0 { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; CHECK-NEXT: ret{{[l|q]}} %ext = extractelement <2 x double> %a0, i32 0 %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %ext, @@ -140,7 +140,7 @@ define <8 x half> @f17(<4 x float> %a0, <8 x half> %a1) #0 { ; CHECK-LABEL: f17: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vmovsh {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; CHECK-NEXT: ret{{[l|q]}} %ext = extractelement <4 x float> %a0, i32 0 %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %ext, diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 6b8a03ba5eb7..762900e0bb18 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -9,8 +9,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX512VLVBMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX512VLVBMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s --check-prefixes=AVX512VLVBMI2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2 diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 6fbc10307e0b..0b98a9388adc 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -6,9 +6,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10,AVX10_256 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s --check-prefixes=AVX512VLVBMI2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2 @@ -118,10 +117,10 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: var_funnnel_v4i64: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: var_funnnel_v4i64: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_funnnel_v4i64: ; XOPAVX1: # %bb.0: @@ -273,10 +272,10 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: var_funnnel_v8i32: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: var_funnnel_v8i32: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_funnnel_v8i32: ; XOPAVX1: # %bb.0: @@ -426,10 +425,10 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: var_funnnel_v16i16: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: var_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -680,34 +679,6 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX512VLVBMI2-LABEL: var_funnnel_v32i8: -; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95] -; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3 -; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm0 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLVBMI2-NEXT: vpsllvw %zmm0, %zmm3, %zmm0 -; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512VLVBMI2-NEXT: retq -; -; AVX10_256-LABEL: var_funnnel_v32i8: -; AVX10_256: # %bb.0: -; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX10_256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 -; AVX10_256-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31] -; AVX10_256-NEXT: vpsllvw %ymm5, %ymm3, %ymm3 -; AVX10_256-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23] -; AVX10_256-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 -; AVX10_256-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX10_256-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 -; AVX10_256-NEXT: retq -; ; XOPAVX1-LABEL: var_funnnel_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 @@ -840,11 +811,11 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatvar_funnnel_v4i64: -; AVX10: # %bb.0: -; AVX10-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX10-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 +; AVX512VLVBMI2-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v4i64: ; XOPAVX1: # %bb.0: @@ -957,11 +928,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VLBW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm3[1,3],ymm0[5,7],ymm3[5,7] ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatvar_funnnel_v8i32: -; AVX10: # %bb.0: -; AVX10-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX10-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 +; AVX512VLVBMI2-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i32: ; XOPAVX1: # %bb.0: @@ -1078,11 +1049,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatvar_funnnel_v16i16: -; AVX10: # %bb.0: -; AVX10-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX10-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 +; AVX512VLVBMI2-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -1212,17 +1183,17 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatvar_funnnel_v32i8: -; AVX10: # %bb.0: -; AVX10-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX10-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX10-NEXT: vpsllw %xmm2, %ymm3, %ymm3 -; AVX10-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX10-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX10-NEXT: vpsllw %xmm2, %ymm0, %ymm0 -; AVX10-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX10-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] +; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm3, %ymm3 +; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm3, %ymm3 +; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] +; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v32i8: ; XOPAVX1: # %bb.0: @@ -1452,25 +1423,25 @@ define void @fancierRotate2(ptr %arr, ptr %control, i32 %rot0, i32 %rot1) { ; AVX512VLBW-NEXT: vzeroupper ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: fancierRotate2: -; AVX10: # %bb.0: # %entry -; AVX10-NEXT: vpbroadcastd %edx, %ymm0 -; AVX10-NEXT: vpbroadcastd %ecx, %ymm1 -; AVX10-NEXT: movq $-1024, %rax # imm = 0xFC00 -; AVX10-NEXT: .p2align 4 -; AVX10-NEXT: .LBB8_1: # %loop -; AVX10-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX10-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX10-NEXT: vptestnmb %xmm2, %xmm2, %k1 -; AVX10-NEXT: vpblendmd %ymm0, %ymm1, %ymm2 {%k1} -; AVX10-NEXT: vmovdqu 4096(%rdi,%rax,4), %ymm3 -; AVX10-NEXT: vprolvd %ymm2, %ymm3, %ymm2 -; AVX10-NEXT: vmovdqu %ymm2, 4096(%rdi,%rax,4) -; AVX10-NEXT: addq $8, %rax -; AVX10-NEXT: jne .LBB8_1 -; AVX10-NEXT: # %bb.2: # %exit -; AVX10-NEXT: vzeroupper -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: fancierRotate2: +; AVX512VLVBMI2: # %bb.0: # %entry +; AVX512VLVBMI2-NEXT: vpbroadcastd %edx, %ymm0 +; AVX512VLVBMI2-NEXT: vpbroadcastd %ecx, %ymm1 +; AVX512VLVBMI2-NEXT: movq $-1024, %rax # imm = 0xFC00 +; AVX512VLVBMI2-NEXT: .p2align 4 +; AVX512VLVBMI2-NEXT: .LBB8_1: # %loop +; AVX512VLVBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; AVX512VLVBMI2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX512VLVBMI2-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; AVX512VLVBMI2-NEXT: vpblendmd %ymm0, %ymm1, %ymm2 {%k1} +; AVX512VLVBMI2-NEXT: vmovdqu 4096(%rdi,%rax,4), %ymm3 +; AVX512VLVBMI2-NEXT: vprolvd %ymm2, %ymm3, %ymm2 +; AVX512VLVBMI2-NEXT: vmovdqu %ymm2, 4096(%rdi,%rax,4) +; AVX512VLVBMI2-NEXT: addq $8, %rax +; AVX512VLVBMI2-NEXT: jne .LBB8_1 +; AVX512VLVBMI2-NEXT: # %bb.2: # %exit +; AVX512VLVBMI2-NEXT: vzeroupper +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: fancierRotate2: ; XOPAVX1: # %bb.0: # %entry @@ -1623,10 +1594,10 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: constant_funnnel_v4i64: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_funnnel_v4i64: ; XOPAVX1: # %bb.0: @@ -1721,10 +1692,10 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: constant_funnnel_v8i32: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_funnnel_v8i32: ; XOPAVX1: # %bb.0: @@ -1824,10 +1795,10 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: constant_funnnel_v16i16: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -1947,28 +1918,6 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX512VLVBMI2-LABEL: constant_funnnel_v32i8: -; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95] -; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 -; AVX512VLVBMI2-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 -; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512VLVBMI2-NEXT: retq -; -; AVX10_256-LABEL: constant_funnnel_v32i8: -; AVX10_256: # %bb.0: -; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX10_256-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX10_256-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX10_256-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX10_256-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX10_256-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX10_256-NEXT: retq -; ; XOPAVX1-LABEL: constant_funnnel_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 @@ -2069,10 +2018,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatconstant_funnnel_v4i64: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldq $14, %ymm1, %ymm0, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldq $14, %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v4i64: ; XOPAVX1: # %bb.0: @@ -2154,10 +2103,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatconstant_funnnel_v8i32: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldd $4, %ymm1, %ymm0, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldd $4, %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v8i32: ; XOPAVX1: # %bb.0: @@ -2239,10 +2188,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatconstant_funnnel_v16i16: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -2330,12 +2279,12 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi ; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2)) ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatconstant_funnnel_v32i8: -; AVX10: # %bb.0: -; AVX10-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX10-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2)) -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2)) +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index bf525442a419..20be5791309f 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -9,8 +9,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX512VLVBMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX512VLVBMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s --check-prefixes=AVX512VLVBMI2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index b0a1a91bdccc..1f164635910c 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -6,9 +6,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10,AVX10_256 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s --check-prefixes=AVX512VLVBMI2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2 @@ -118,11 +117,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: var_funnnel_v4i64: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: var_funnnel_v4i64: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_funnnel_v4i64: ; XOPAVX1: # %bb.0: @@ -274,11 +273,11 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: var_funnnel_v8i32: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: var_funnnel_v8i32: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_funnnel_v8i32: ; XOPAVX1: # %bb.0: @@ -454,11 +453,11 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: var_funnnel_v16i16: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: var_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -720,20 +719,6 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) ; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; -; AVX10_256-LABEL: var_funnnel_v32i8: -; AVX10_256: # %bb.0: -; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX10_256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 -; AVX10_256-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31] -; AVX10_256-NEXT: vpsrlvw %ymm5, %ymm3, %ymm3 -; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23] -; AVX10_256-NEXT: vpsrlvw %ymm1, %ymm0, %ymm1 -; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62] -; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0 -; AVX10_256-NEXT: retq -; ; XOPAVX1-LABEL: var_funnnel_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] @@ -870,12 +855,12 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> % ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatvar_funnnel_v4i64: -; AVX10: # %bb.0: -; AVX10-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX10-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 +; AVX512VLVBMI2-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v4i64: ; XOPAVX1: # %bb.0: @@ -988,12 +973,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % ; AVX512VLBW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6] ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatvar_funnnel_v8i32: -; AVX10: # %bb.0: -; AVX10-NEXT: vpbroadcastd %xmm2, %ymm2 -; AVX10-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 +; AVX512VLVBMI2-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i32: ; XOPAVX1: # %bb.0: @@ -1110,12 +1095,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatvar_funnnel_v16i16: -; AVX10: # %bb.0: -; AVX10-NEXT: vpbroadcastw %xmm2, %ymm2 -; AVX10-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 +; AVX512VLVBMI2-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -1265,17 +1250,6 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VLVBMI2-NEXT: retq ; -; AVX10_256-LABEL: splatvar_funnnel_v32i8: -; AVX10_256: # %bb.0: -; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX10_256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX10_256-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 -; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX10_256-NEXT: vpsrlw %xmm2, %ymm0, %ymm1 -; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62] -; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0 -; AVX10_256-NEXT: retq -; ; XOPAVX1-LABEL: splatvar_funnnel_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 @@ -1388,11 +1362,11 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: constant_funnnel_v4i64: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_funnnel_v4i64: ; XOPAVX1: # %bb.0: @@ -1487,11 +1461,11 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: constant_funnnel_v8i32: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_funnnel_v8i32: ; XOPAVX1: # %bb.0: @@ -1591,11 +1565,11 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: constant_funnnel_v16i16: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 -; AVX10-NEXT: vmovdqa %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX512VLVBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -1761,16 +1735,6 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; -; AVX10_256-LABEL: constant_funnnel_v32i8: -; AVX10_256: # %bb.0: -; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX10_256-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX10_256-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 -; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62] -; AVX10_256-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 -; AVX10_256-NEXT: retq -; ; XOPAVX1-LABEL: constant_funnnel_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 @@ -1869,10 +1833,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatconstant_funnnel_v4i64: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdq $14, %ymm0, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdq $14, %ymm0, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v4i64: ; XOPAVX1: # %bb.0: @@ -1954,10 +1918,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatconstant_funnnel_v8i32: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdd $4, %ymm0, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdd $4, %ymm0, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v8i32: ; XOPAVX1: # %bb.0: @@ -2039,10 +2003,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatconstant_funnnel_v16i16: -; AVX10: # %bb.0: -; AVX10-NEXT: vpshrdw $7, %ymm0, %ymm1, %ymm0 -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshrdw $7, %ymm0, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -2130,12 +2094,12 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi ; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2)) ; AVX512VLBW-NEXT: retq ; -; AVX10-LABEL: splatconstant_funnnel_v32i8: -; AVX10: # %bb.0: -; AVX10-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX10-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2)) -; AVX10-NEXT: retq +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (m32bcst & (ymm0 ^ ymm2)) +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll b/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll new file mode 100644 index 000000000000..31949403b446 --- /dev/null +++ b/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s +; DEBUG-OUTPUT-NOT: .loc +; DEBUG-OUTPUT-NOT: {{.*}}.debug_info + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @foo(i32 %n) #0 !dbg !4 { +entry: + %diff = alloca i32, align 4 + %cb = alloca [16 x i8], align 16 + %cc = alloca [16 x i8], align 16 + store i32 0, ptr %diff, align 4, !tbaa !11 + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds [16 x i8], ptr %cb, i64 0, i64 %indvars.iv + %0 = load i8, ptr %arrayidx, align 1, !tbaa !21 + %conv = sext i8 %0 to i32 + %arrayidx2 = getelementptr inbounds [16 x i8], ptr %cc, i64 0, i64 %indvars.iv + %1 = load i8, ptr %arrayidx2, align 1, !tbaa !21 + %conv3 = sext i8 %1 to i32 + %sub = sub i32 %conv, %conv3 + %add = add nsw i32 %sub, %add8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !25 + +for.end: ; preds = %for.body + store i32 %add, ptr %diff, align 4, !tbaa !11 + call void @ibar(ptr %diff) #2 + ret i32 0 +} + +declare void @ibar(ptr) #1 + +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} +!llvm.dbg.cu = !{!24} + +!1 = !DIFile(filename: "vectorization-remarks.c", directory: ".") +!2 = !{} +!3 = !{!4} +!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !24, scopeLine: 6, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "vectorization-remarks.c", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 1, !"Debug Info Version", i32 3} +!9 = !{!"clang version 3.5.0 "} +!10 = !DILocation(line: 8, column: 3, scope: !4) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 17, column: 8, scope: !16) +!16 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !17) +!17 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !18) +!18 = distinct !DILexicalBlock(line: 17, column: 3, file: !1, scope: !4) +!19 = !DILocation(line: 18, column: 5, scope: !20) +!20 = distinct !DILexicalBlock(line: 17, column: 27, file: !1, scope: !18) +!21 = !{!13, !13, i64 0} +!22 = !DILocation(line: 20, column: 3, scope: !4) +!23 = !DILocation(line: 21, column: 3, scope: !4) +!24 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, emissionKind: NoDebug) +!25 = !{!25, !15} diff --git a/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir b/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir index de76d90bf6b6..474b77665867 100644 --- a/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir +++ b/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir @@ -106,7 +106,7 @@ body: | # RUN: -x86-wineh-unwindv2-force-mode=1 | FileCheck %s \ # RUN: --check-prefix=BESTEFFORT # DEALLOC-AFTER-EPILOG: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'dealloc_after_epilog': -# DEALLOC-AFTER-EPILOG-SAME: Unexpected lea, mov or add instruction after the epilog +# DEALLOC-AFTER-EPILOG-SAME: Unexpected lea or add instruction after the epilog --- | define dso_local void @dealloc_after_epilog() local_unnamed_addr { @@ -161,6 +161,135 @@ body: | RET64 ... +;--- mov_no_setframe.mir +# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \ +# RUN: %t/mov_no_setframe.mir -run-pass=x86-wineh-unwindv2 2>&1 | \ +# RUN: FileCheck %s --check-prefix=MOV-NO-SETFRAME +# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/mov_no_setframe.mir \ +# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \ +# RUN: FileCheck %s --check-prefix=BESTEFFORT +# MOV-NO-SETFRAME: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'mov_no_setframe': +# MOV-NO-SETFRAME-SAME: The epilog is setting frame back, but prolog did not set it + +--- | + define dso_local void @mov_no_setframe() local_unnamed_addr { + entry: + ret void + } + !llvm.module.flags = !{!0} + !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2} +... +--- +name: mov_no_setframe +body: | + bb.0.entry: + frame-setup SEH_EndPrologue + SEH_BeginEpilogue + $rsp = MOV64rr $rbp + SEH_EndEpilogue + RET64 +... + +;--- mov_after_epilog.mir +# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \ +# RUN: %t/mov_after_epilog.mir -run-pass=x86-wineh-unwindv2 2>&1 | \ +# RUN: FileCheck %s --check-prefix=MOV-AFTER-EPILOG +# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - \ +# RUN: %t/mov_after_epilog.mir -run-pass=x86-wineh-unwindv2 \ +# RUN: -x86-wineh-unwindv2-force-mode=1 | FileCheck %s \ +# RUN: --check-prefix=BESTEFFORT +# MOV-AFTER-EPILOG: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'mov_after_epilog': +# MOV-AFTER-EPILOG-SAME: Unexpected mov instruction after the epilog + +--- | + define dso_local void @mov_after_epilog() local_unnamed_addr { + entry: + ret void + } + !llvm.module.flags = !{!0} + !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2} +... +--- +name: mov_after_epilog +body: | + bb.0.entry: + $rbp = MOV64rr $rsp + frame-setup SEH_SetFrame 52, 0 + frame-setup SEH_EndPrologue + SEH_BeginEpilogue + SEH_EndEpilogue + $rsp = MOV64rr $rbp + RET64 +... + +;--- pop_before_mov.mir +# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \ +# RUN: %t/pop_before_mov.mir -run-pass=x86-wineh-unwindv2 2>&1 | \ +# RUN: FileCheck %s --check-prefix=POP-BEFORE-MOV +# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/pop_before_mov.mir \ +# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \ +# RUN: FileCheck %s --check-prefix=BESTEFFORT +# POP-BEFORE-MOV: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'pop_before_mov': +# POP-BEFORE-MOV-SAME: The epilog is setting the frame back after popping registers + +--- | + define dso_local void @pop_before_mov() local_unnamed_addr { + entry: + ret void + } + !llvm.module.flags = !{!0} + !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2} +... +--- +name: pop_before_mov +body: | + bb.0.entry: + frame-setup PUSH64r killed $rdi, implicit-def $rsp, implicit $rsp + frame-setup SEH_PushReg 55 + $rbp = MOV64rr $rsp + frame-setup SEH_SetFrame 52, 0 + frame-setup SEH_EndPrologue + SEH_BeginEpilogue + $rdi = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $rsp = MOV64rr $rbp + SEH_EndEpilogue + RET64 +... + +;--- mov_after_dealloc.mir +# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \ +# RUN: %t/mov_after_dealloc.mir -run-pass=x86-wineh-unwindv2 2>&1 | \ +# RUN: FileCheck %s --check-prefix=MOV-AFTER-DEALLOC +# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/mov_after_dealloc.mir \ +# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \ +# RUN: FileCheck %s --check-prefix=BESTEFFORT +# MOV-AFTER-DEALLOC: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'mov_after_dealloc': +# MOV-AFTER-DEALLOC-SAME: Cannot set the frame back after the stack allocation has been deallocated + +--- | + define dso_local void @mov_after_dealloc() local_unnamed_addr { + entry: + ret void + } + !llvm.module.flags = !{!0} + !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2} +... +--- +name: mov_after_dealloc +body: | + bb.0.entry: + $rbp = MOV64rr $rsp + frame-setup SEH_SetFrame 52, 0 + $rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags + frame-setup SEH_StackAlloc 40 + frame-setup SEH_EndPrologue + SEH_BeginEpilogue + $rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags + $rsp = MOV64rr $rbp + SEH_EndEpilogue + RET64 +... + ;--- too_many_pops.mir # RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - %t/too_many_pops.mir \ # RUN: -run-pass=x86-wineh-unwindv2 2>&1 | FileCheck %s \ diff --git a/llvm/test/CodeGen/X86/win64-eh-unwindv2.ll b/llvm/test/CodeGen/X86/win64-eh-unwindv2.ll index 326127a919f3..0d92d044e1b9 100644 --- a/llvm/test/CodeGen/X86/win64-eh-unwindv2.ll +++ b/llvm/test/CodeGen/X86/win64-eh-unwindv2.ll @@ -171,9 +171,44 @@ define dso_local void @large_aligned_alloc() align 16 { ; CHECK-NEXT: retq ; CHECK-NEXT: .seh_endproc +define dso_local void @set_frame_only() local_unnamed_addr { + tail call i64 @llvm.x86.flags.read.u64() + ret void +} + +; CHECK-LABEL: set_frame_only: +; CHECK: .seh_unwindversion 2 +; CHECK: .seh_pushreg %rbp +; CHECK: .seh_setframe %rbp, 0 +; CHECK: .seh_endprologue +; CHECK-NOT: .seh_endproc +; CHECK: .seh_startepilogue +; CHECK-NEXT: .seh_unwindv2start +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_endproc + +attributes #1 = { noreturn } +define dso_local void @no_return_func() local_unnamed_addr #1 { +entry: + call void @d() + unreachable +} +; CHECK-LABEL: no_return_func: +; CHECK-NOT: .seh_unwindversion 2 +; CHECK: .seh_stackalloc +; CHECK-NEXT: .seh_endprologue +; CHECK-NOT: .seh_startepilogue +; CHECK-NOT: .seh_unwindv2start +; CHECK: int3 +; CHECK-NEXT: .seh_endproc + +declare i64 @llvm.x86.flags.read.u64() declare void @a() local_unnamed_addr declare i32 @b() local_unnamed_addr declare i32 @c(i32) local_unnamed_addr +declare void @d() local_unnamed_addr #1 !llvm.module.flags = !{!0} -!0 = !{i32 1, !"winx64-eh-unwindv2", i32 1} +!0 = !{i32 1, !"winx64-eh-unwindv2", i32 2} diff --git a/llvm/test/CodeGen/X86/xor-not-combine.ll b/llvm/test/CodeGen/X86/xor-not-combine.ll new file mode 100644 index 000000000000..af65ade35ce8 --- /dev/null +++ b/llvm/test/CodeGen/X86/xor-not-combine.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +; Test for DAG combine: fold (not (sub Y, X)) -> (add X, ~Y) +; when Y is a constant. + +; Test case 1: Y is a constant - should transform to (add X, ~Y) +define i32 @test_not_sub_constant(i32 %x) { +; CHECK-LABEL: test_not_sub_constant: +; CHECK: # %bb.0: +; CHECK: leal -101(%rdi), %eax +; CHECK-NEXT: retq + %sub = sub i32 100, %x + %not = xor i32 %sub, -1 + ret i32 %not +} + +; Test case 2: Y is not a constant - should NOT optimize +define i32 @test_not_sub_non_constant(i32 %x, i32 %y) { +; CHECK-LABEL: test_not_sub_non_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: retq + %sub = sub i32 %y, %x + %not = xor i32 %sub, -1 + ret i32 %not +} |
