summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/abds-neg.ll410
-rw-r--r--llvm/test/CodeGen/X86/abds.ll390
-rw-r--r--llvm/test/CodeGen/X86/abdu-neg.ll282
-rw-r--r--llvm/test/CodeGen/X86/abdu.ll225
-rw-r--r--llvm/test/CodeGen/X86/abs.ll55
-rw-r--r--llvm/test/CodeGen/X86/add-sub-bool.ll25
-rw-r--r--llvm/test/CodeGen/X86/arg-copy-elide.ll8
-rw-r--r--llvm/test/CodeGen/X86/avx512fp16-cvt.ll42
-rw-r--r--llvm/test/CodeGen/X86/bitselect.ll55
-rw-r--r--llvm/test/CodeGen/X86/bsf.ll144
-rw-r--r--llvm/test/CodeGen/X86/bsr.ll158
-rw-r--r--llvm/test/CodeGen/X86/bswap-wide-int.ll30
-rw-r--r--llvm/test/CodeGen/X86/constructor.ll13
-rw-r--r--llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll36
-rw-r--r--llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll421
-rw-r--r--llvm/test/CodeGen/X86/dollar-name-asm.ll7
-rw-r--r--llvm/test/CodeGen/X86/dollar-name.ll79
-rw-r--r--llvm/test/CodeGen/X86/exp10-libcall.ll12
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-x32.ll1
-rw-r--r--llvm/test/CodeGen/X86/fp128-cast-strict.ll92
-rw-r--r--llvm/test/CodeGen/X86/fp128-cast.ll125
-rw-r--r--llvm/test/CodeGen/X86/fp128-libcalls-strict.ll2060
-rw-r--r--llvm/test/CodeGen/X86/fp128-libcalls.ll1773
-rw-r--r--llvm/test/CodeGen/X86/frameaddr.ll11
-rw-r--r--llvm/test/CodeGen/X86/freeze-unary.ll80
-rw-r--r--llvm/test/CodeGen/X86/fshl.ll185
-rw-r--r--llvm/test/CodeGen/X86/fshr.ll170
-rw-r--r--llvm/test/CodeGen/X86/funnel-shift.ll74
-rw-r--r--llvm/test/CodeGen/X86/i128-add.ll23
-rw-r--r--llvm/test/CodeGen/X86/i128-fp128-abi.ll706
-rw-r--r--llvm/test/CodeGen/X86/i128-sdiv.ll61
-rw-r--r--llvm/test/CodeGen/X86/i128-udiv.ll12
-rw-r--r--llvm/test/CodeGen/X86/iabs.ll43
-rw-r--r--llvm/test/CodeGen/X86/icmp-shift-opt.ll102
-rw-r--r--llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll4
-rw-r--r--llvm/test/CodeGen/X86/invalid-operand-bundle-callbr.ll2
-rw-r--r--llvm/test/CodeGen/X86/invalid-operand-bundle-invoke.ll2
-rw-r--r--llvm/test/CodeGen/X86/kcfi.ll23
-rw-r--r--llvm/test/CodeGen/X86/ldexp-f80.ll8
-rw-r--r--llvm/test/CodeGen/X86/ldexp-libcall.ll8
-rw-r--r--llvm/test/CodeGen/X86/ldexp-not-readonly.ll8
-rw-r--r--llvm/test/CodeGen/X86/ldexp-strict.ll14
-rw-r--r--llvm/test/CodeGen/X86/ldexp-wrong-signature.ll8
-rw-r--r--llvm/test/CodeGen/X86/ldexp-wrong-signature2.ll8
-rw-r--r--llvm/test/CodeGen/X86/ldexp.ll80
-rw-r--r--llvm/test/CodeGen/X86/lea-2.ll1
-rw-r--r--llvm/test/CodeGen/X86/lea-3.ll1
-rw-r--r--llvm/test/CodeGen/X86/lea-4.ll1
-rw-r--r--llvm/test/CodeGen/X86/lea-5.ll1
-rw-r--r--llvm/test/CodeGen/X86/lea.ll1
-rw-r--r--llvm/test/CodeGen/X86/llvm.frexp.f80.ll16
-rw-r--r--llvm/test/CodeGen/X86/llvm.frexp.ll66
-rw-r--r--llvm/test/CodeGen/X86/mul128.ll97
-rw-r--r--llvm/test/CodeGen/X86/neg-abs.ll55
-rw-r--r--llvm/test/CodeGen/X86/pcsections-atomics.ll3378
-rw-r--r--llvm/test/CodeGen/X86/popcnt.ll485
-rw-r--r--llvm/test/CodeGen/X86/pr46004.ll19
-rw-r--r--llvm/test/CodeGen/X86/scalar-fp-to-i32.ll76
-rw-r--r--llvm/test/CodeGen/X86/scalar-fp-to-i64.ll76
-rw-r--r--llvm/test/CodeGen/X86/scmp.ll39
-rw-r--r--llvm/test/CodeGen/X86/sdiv_fix.ll99
-rw-r--r--llvm/test/CodeGen/X86/sdiv_fix_sat.ll440
-rw-r--r--llvm/test/CodeGen/X86/shift-combine.ll14
-rw-r--r--llvm/test/CodeGen/X86/shift-i128.ll72
-rw-r--r--llvm/test/CodeGen/X86/sincos-stack-args.ll4
-rw-r--r--llvm/test/CodeGen/X86/sincos.ll18
-rw-r--r--llvm/test/CodeGen/X86/smax.ll78
-rw-r--r--llvm/test/CodeGen/X86/smin.ll81
-rw-r--r--llvm/test/CodeGen/X86/stack-align2.ll5
-rw-r--r--llvm/test/CodeGen/X86/stack-protector-target-openbsd.ll81
-rw-r--r--llvm/test/CodeGen/X86/test-shrink-bug.ll19
-rw-r--r--llvm/test/CodeGen/X86/ucmp.ll34
-rw-r--r--llvm/test/CodeGen/X86/udiv_fix.ll28
-rw-r--r--llvm/test/CodeGen/X86/udiv_fix_sat.ll28
-rw-r--r--llvm/test/CodeGen/X86/umax.ll135
-rw-r--r--llvm/test/CodeGen/X86/umin.ll81
-rw-r--r--llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll6
-rw-r--r--llvm/test/CodeGen/X86/unreachable-mbb-undef-phi.mir1
-rw-r--r--llvm/test/CodeGen/X86/wide-integer-cmp.ll14
-rw-r--r--llvm/test/CodeGen/X86/win32-int-runtime-libcalls.ll113
-rw-r--r--llvm/test/CodeGen/X86/x86-64-baseptr.ll8
-rw-r--r--llvm/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll9
82 files changed, 9487 insertions, 4268 deletions
diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll
index f6d66ab47ce0..2911edfbfd40 100644
--- a/llvm/test/CodeGen/X86/abds-neg.ll
+++ b/llvm/test/CodeGen/X86/abds-neg.ll
@@ -367,44 +367,49 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %edi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll %eax, %esi
-; X86-NEXT: cmovll %ebx, %edi
-; X86-NEXT: cmovll %ebp, %edx
-; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %eax
+; X86-NEXT: sbbl 32(%ebp), %edx
+; X86-NEXT: sbbl 36(%ebp), %esi
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %ebx, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %ebx, 4(%edx)
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl %edi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -438,44 +443,49 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %edi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll %eax, %esi
-; X86-NEXT: cmovll %ebx, %edi
-; X86-NEXT: cmovll %ebp, %edx
-; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %eax
+; X86-NEXT: sbbl 32(%ebp), %edx
+; X86-NEXT: sbbl 36(%ebp), %esi
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %ebx, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %ebx, 4(%edx)
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl %edi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -639,55 +649,59 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %edi, %ecx
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: cmovll %edx, %ecx
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovll %esi, %edx
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
-; X86-NEXT: sbbl %edi, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %ebp, 8(%eax)
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: sbbl 44(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: cmovll %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: cmovll 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: cmovll 28(%ebp), %eax
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: cmovll %edi, %ecx
+; X86-NEXT: cmpl %edi, %esi
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: sbbl 28(%ebp), %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl 32(%ebp), %edi
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: sbbl 36(%ebp), %edi
+; X86-NEXT: cmovll 36(%ebp), %ebx
+; X86-NEXT: cmovll 32(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmovll 28(%ebp), %edi
+; X86-NEXT: cmovll 24(%ebp), %esi
+; X86-NEXT: subl %esi, %ecx
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %eax, 4(%edx)
+; X86-NEXT: movl %edi, 8(%edx)
+; X86-NEXT: movl %esi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -848,37 +862,41 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovgel (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovgel %ebx, %esi
-; X86-NEXT: cmovgel %ebp, %ecx
-; X86-NEXT: cmovgel %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl 52(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edx
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %ebx
+; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovgel %edi, %esi
+; X86-NEXT: cmovgel %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1118,35 +1136,39 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: subl %edi, %ebp
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %esi
+; X86-NEXT: sbbl 44(%ebp), %edx
+; X86-NEXT: sbbl 48(%ebp), %ecx
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: xorl %edi, %eax
+; X86-NEXT: xorl %edi, %ecx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: subl %esi, %ebx
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: movl %ebp, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1175,35 +1197,39 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: subl %edi, %ebp
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %esi
+; X86-NEXT: sbbl 44(%ebp), %edx
+; X86-NEXT: sbbl 48(%ebp), %ecx
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: xorl %edi, %eax
+; X86-NEXT: xorl %edi, %ecx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: subl %esi, %ebx
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: movl %ebp, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index 0356c2702a41..a1a4ba81ae49 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -343,37 +343,41 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -404,37 +408,41 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -585,37 +593,41 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -768,37 +780,41 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1027,35 +1043,38 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_subnsw_i128:
@@ -1079,35 +1098,38 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128_undef:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_subnsw_i128_undef:
@@ -1282,37 +1304,41 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_select_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: sbbl 36(%ebp), %ebx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/abdu-neg.ll b/llvm/test/CodeGen/X86/abdu-neg.ll
index 6bda99c89a37..b7c34070f1af 100644
--- a/llvm/test/CodeGen/X86/abdu-neg.ll
+++ b/llvm/test/CodeGen/X86/abdu-neg.ll
@@ -355,39 +355,43 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: xorl %ebp, %ecx
-; X86-NEXT: xorl %ebp, %esi
-; X86-NEXT: xorl %ebp, %ebx
-; X86-NEXT: xorl %ebp, %edx
-; X86-NEXT: subl %ebp, %edx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: negl %edx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebx, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edi
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %eax
; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: xorl %ebx, %eax
+; X86-NEXT: xorl %ebx, %esi
+; X86-NEXT: xorl %ebx, %edi
+; X86-NEXT: xorl %ebx, %ecx
+; X86-NEXT: subl %ebx, %ecx
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: negl %ecx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -423,39 +427,43 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: xorl %ebp, %ecx
-; X86-NEXT: xorl %ebp, %esi
-; X86-NEXT: xorl %ebp, %ebx
-; X86-NEXT: xorl %ebp, %edx
-; X86-NEXT: subl %ebp, %edx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: negl %edx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebx, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edi
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: xorl %ebx, %eax
+; X86-NEXT: xorl %ebx, %esi
+; X86-NEXT: xorl %ebx, %edi
+; X86-NEXT: xorl %ebx, %ecx
+; X86-NEXT: subl %ebx, %ecx
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: negl %ecx
; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -621,55 +629,59 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %edi, %ecx
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: cmovbl %edx, %ecx
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovbl %esi, %edx
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
-; X86-NEXT: sbbl %edi, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %ebp, 8(%eax)
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: sbbl 44(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: cmovbl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: cmovbl 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: cmovbl 28(%ebp), %eax
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: cmovbl %edi, %ecx
+; X86-NEXT: cmpl %edi, %esi
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: sbbl 28(%ebp), %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl 32(%ebp), %edi
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: sbbl 36(%ebp), %edi
+; X86-NEXT: cmovbl 36(%ebp), %ebx
+; X86-NEXT: cmovbl 32(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmovbl 28(%ebp), %edi
+; X86-NEXT: cmovbl 24(%ebp), %esi
+; X86-NEXT: subl %esi, %ecx
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %eax, 4(%edx)
+; X86-NEXT: movl %edi, 8(%edx)
+; X86-NEXT: movl %esi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -827,39 +839,43 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: xorl %ebp, %ecx
-; X86-NEXT: xorl %ebp, %esi
-; X86-NEXT: xorl %ebp, %ebx
-; X86-NEXT: xorl %ebp, %edx
-; X86-NEXT: subl %ebp, %edx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: negl %edx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebx, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edi
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: xorl %ebx, %eax
+; X86-NEXT: xorl %ebx, %esi
+; X86-NEXT: xorl %ebx, %edi
+; X86-NEXT: xorl %ebx, %ecx
+; X86-NEXT: subl %ebx, %ecx
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: negl %ecx
; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 27acec32fd34..043c9155f52f 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -326,35 +326,38 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_ext_i128:
@@ -381,35 +384,38 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_ext_i128_undef:
@@ -548,35 +554,38 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_minmax_i128:
@@ -717,35 +726,38 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_cmp_i128:
@@ -887,35 +899,38 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_select_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl 40(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %esi
+; X86-NEXT: sbbl 48(%ebp), %edx
+; X86-NEXT: sbbl 52(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_select_i128:
diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
index bae140abdf6b..e252d5953e60 100644
--- a/llvm/test/CodeGen/X86/abs.ll
+++ b/llvm/test/CodeGen/X86/abs.ll
@@ -144,31 +144,34 @@ define i128 @test_i128(i128 %a) nounwind {
;
; X86-LABEL: test_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: subl %edx, %ebx
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl %ebx, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = call i128 @llvm.abs.i128(i128 %a, i1 false)
ret i128 %r
@@ -688,13 +691,17 @@ define i128 @test_sextinreg_i128(i128 %a) nounwind {
;
; X86-LABEL: test_sextinreg_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl 24(%ebp), %esi
; X86-NEXT: xorl %edx, %esi
; X86-NEXT: subl %edx, %esi
; X86-NEXT: sbbl %edx, %ecx
@@ -702,7 +709,9 @@ define i128 @test_sextinreg_i128(i128 %a) nounwind {
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
+; X86-NEXT: leal -4(%ebp), %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%shl = shl i128 %a, 64
%ashr = ashr exact i128 %shl, 64
diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index c2bfcf57185e..1df284fb9fe2 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -104,18 +104,21 @@ define i24 @test_i24_add_add_idx(i24 %x, i24 %y, i24 %z) nounwind {
define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
; X86-LABEL: test_i128_add_add_idx:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: btl $5, {{[0-9]+}}(%esp)
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: addl 24(%ebp), %esi
+; X86-NEXT: adcl 28(%ebp), %edi
+; X86-NEXT: adcl 32(%ebp), %ecx
+; X86-NEXT: adcl 36(%ebp), %edx
+; X86-NEXT: btl $5, 64(%ebp)
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %ecx
@@ -124,8 +127,10 @@ define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test_i128_add_add_idx:
diff --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll
index 0eb2c630e681..f13627b55856 100644
--- a/llvm/test/CodeGen/X86/arg-copy-elide.ll
+++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll
@@ -188,11 +188,11 @@ define void @split_i128(ptr %sret, i128 %x) {
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: subl $48, %esp
-; CHECK-NEXT: movl 12(%ebp), %eax
+; CHECK-NEXT: movl 24(%ebp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl 16(%ebp), %ebx
-; CHECK-NEXT: movl 20(%ebp), %esi
-; CHECK-NEXT: movl 24(%ebp), %edi
+; CHECK-NEXT: movl 28(%ebp), %ebx
+; CHECK-NEXT: movl 32(%ebp), %esi
+; CHECK-NEXT: movl 36(%ebp), %edi
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%esp)
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
index f66f0c0ceabc..cc58bc1e44f3 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
@@ -628,13 +628,19 @@ define half @s128_to_half(i128 %x) {
;
; X86-LABEL: s128_to_half:
; X86: # %bb.0:
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vmovups 8(%ebp), %xmm0
; X86-NEXT: vmovups %xmm0, (%esp)
; X86-NEXT: calll __floattihf
-; X86-NEXT: addl $16, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
%a = sitofp i128 %x to half
ret half %a
@@ -713,13 +719,19 @@ define half @u128_to_half(i128 %x) {
;
; X86-LABEL: u128_to_half:
; X86: # %bb.0:
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vmovups 8(%ebp), %xmm0
; X86-NEXT: vmovups %xmm0, (%esp)
; X86-NEXT: calll __floatuntihf
-; X86-NEXT: addl $16, %esp
-; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
%a = uitofp i128 %x to half
ret half %a
@@ -1020,11 +1032,15 @@ define half @f128_to_half(fp128 %x) nounwind {
;
; X86-LABEL: f128_to_half:
; X86: # %bb.0:
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vmovups 8(%ebp), %xmm0
; X86-NEXT: vmovups %xmm0, (%esp)
; X86-NEXT: calll __trunctfhf2
-; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
%a = fptrunc fp128 %x to half
ret half %a
diff --git a/llvm/test/CodeGen/X86/bitselect.ll b/llvm/test/CodeGen/X86/bitselect.ll
index 4fc0827ac4dd..33381313d3c1 100644
--- a/llvm/test/CodeGen/X86/bitselect.ll
+++ b/llvm/test/CodeGen/X86/bitselect.ll
@@ -146,37 +146,40 @@ define i64 @bitselect_i64(i64 %a, i64 %b, i64 %m) nounwind {
define i128 @bitselect_i128(i128 %a, i128 %b, i128 %m) nounwind {
; X86-LABEL: bitselect_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edi, %ecx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edi, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: andl 56(%ebp), %ecx
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: andl 60(%ebp), %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl 48(%ebp), %edi
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: andl 64(%ebp), %edi
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: andl 68(%ebp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-NOBMI-LABEL: bitselect_i128:
diff --git a/llvm/test/CodeGen/X86/bsf.ll b/llvm/test/CodeGen/X86/bsf.ll
index 312f94c04123..143e10e6909e 100644
--- a/llvm/test/CodeGen/X86/bsf.ll
+++ b/llvm/test/CodeGen/X86/bsf.ll
@@ -263,70 +263,78 @@ define i128 @cmov_bsf128(i128 %x, i128 %y) nounwind {
; X86-LABEL: cmov_bsf128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: orl %ebp, %edx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: orl %eax, %esi
-; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: orl %eax, %edx
; X86-NEXT: je .LBB8_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: jne .LBB8_3
; X86-NEXT: # %bb.4: # %cond.false
-; X86-NEXT: rep bsfl %edi, %esi
-; X86-NEXT: addl $32, %esi
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB8_7
-; X86-NEXT: .LBB8_6:
-; X86-NEXT: rep bsfl %eax, %edx
-; X86-NEXT: jmp .LBB8_8
+; X86-NEXT: rep bsfl %esi, %eax
+; X86-NEXT: addl $32, %eax
+; X86-NEXT: jmp .LBB8_5
; X86-NEXT: .LBB8_1:
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: xorl %ebp, %ebp
-; X86-NEXT: movl $128, %esi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: movl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: jmp .LBB8_11
; X86-NEXT: .LBB8_3:
-; X86-NEXT: rep bsfl %ecx, %esi
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: rep bsfl %ecx, %eax
+; X86-NEXT: .LBB8_5: # %cond.false
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: jne .LBB8_6
-; X86-NEXT: .LBB8_7: # %cond.false
-; X86-NEXT: rep bsfl %ebp, %edx
+; X86-NEXT: # %bb.7: # %cond.false
+; X86-NEXT: rep bsfl %ebx, %edx
; X86-NEXT: addl $32, %edx
+; X86-NEXT: jmp .LBB8_8
+; X86-NEXT: .LBB8_6:
+; X86-NEXT: rep bsfl %edi, %edx
; X86-NEXT: .LBB8_8: # %cond.false
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: orl %esi, %edi
; X86-NEXT: jne .LBB8_10
; X86-NEXT: # %bb.9: # %cond.false
; X86-NEXT: addl $64, %edx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: .LBB8_10: # %cond.false
-; X86-NEXT: xorl %ebp, %ebp
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: .LBB8_11: # %cond.end
-; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl %eax, %edi
-; X86-NEXT: orl %ecx, %edi
-; X86-NEXT: jne .LBB8_13
-; X86-NEXT: # %bb.12:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: .LBB8_13: # %cond.end
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, 12(%eax)
-; X86-NEXT: movl %ebx, 8(%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: orl 32(%ebp), %ecx
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: je .LBB8_12
+; X86-NEXT: # %bb.13: # %cond.end
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jmp .LBB8_14
+; X86-NEXT: .LBB8_12:
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: .LBB8_14: # %cond.end
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -361,46 +369,49 @@ define i128 @cmov_bsf128_undef(i128 %x, i128 %y) nounwind {
; X86-LABEL: cmov_bsf128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: orl %ebx, %ebp
-; X86-NEXT: orl %edi, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: je .LBB9_11
; X86-NEXT: # %bb.1: # %select.true.sink
; X86-NEXT: testl %edx, %edx
; X86-NEXT: jne .LBB9_2
; X86-NEXT: # %bb.3: # %select.true.sink
-; X86-NEXT: rep bsfl %ecx, %edi
-; X86-NEXT: addl $32, %edi
-; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: rep bsfl %ecx, %ebx
+; X86-NEXT: addl $32, %ebx
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: je .LBB9_6
; X86-NEXT: .LBB9_5:
-; X86-NEXT: rep bsfl %ebx, %esi
+; X86-NEXT: rep bsfl %edi, %esi
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: je .LBB9_8
; X86-NEXT: jmp .LBB9_9
; X86-NEXT: .LBB9_11: # %select.end
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 52(%ebp), %ecx
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: jmp .LBB9_10
; X86-NEXT: .LBB9_2:
-; X86-NEXT: rep bsfl %edx, %edi
-; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: rep bsfl %edx, %ebx
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: jne .LBB9_5
; X86-NEXT: .LBB9_6: # %select.true.sink
; X86-NEXT: rep bsfl %esi, %esi
@@ -409,13 +420,14 @@ define i128 @cmov_bsf128_undef(i128 %x, i128 %y) nounwind {
; X86-NEXT: jne .LBB9_9
; X86-NEXT: .LBB9_8: # %select.true.sink
; X86-NEXT: addl $64, %esi
-; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %esi, %ebx
; X86-NEXT: .LBB9_9: # %select.true.sink
-; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %ebx, (%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: .LBB9_10: # %select.true.sink
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/bsr.ll b/llvm/test/CodeGen/X86/bsr.ll
index fbca4af425ea..ab0478a4e944 100644
--- a/llvm/test/CodeGen/X86/bsr.ll
+++ b/llvm/test/CodeGen/X86/bsr.ll
@@ -291,79 +291,80 @@ define i128 @cmov_bsr128(i128 %x, i128 %y) nounwind {
; X86-LABEL: cmov_bsr128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: orl %ebp, %edx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: orl %ebx, %esi
-; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: orl %eax, %edx
; X86-NEXT: je .LBB8_1
; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: testl %ebp, %ebp
+; X86-NEXT: testl %esi, %esi
; X86-NEXT: jne .LBB8_3
; X86-NEXT: # %bb.4: # %cond.false
-; X86-NEXT: bsrl %ebx, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: orl $32, %edx
+; X86-NEXT: bsrl %ebx, %esi
+; X86-NEXT: xorl $31, %esi
+; X86-NEXT: orl $32, %esi
; X86-NEXT: testl %edi, %edi
; X86-NEXT: je .LBB8_7
; X86-NEXT: .LBB8_6:
-; X86-NEXT: bsrl %edi, %esi
-; X86-NEXT: xorl $31, %esi
+; X86-NEXT: bsrl %edi, %eax
+; X86-NEXT: xorl $31, %eax
; X86-NEXT: jmp .LBB8_8
; X86-NEXT: .LBB8_1:
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill
-; X86-NEXT: movl $128, %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl $128, %esi
; X86-NEXT: jmp .LBB8_11
; X86-NEXT: .LBB8_3:
-; X86-NEXT: bsrl %ebp, %edx
-; X86-NEXT: xorl $31, %edx
+; X86-NEXT: bsrl %esi, %esi
+; X86-NEXT: xorl $31, %esi
; X86-NEXT: testl %edi, %edi
; X86-NEXT: jne .LBB8_6
; X86-NEXT: .LBB8_7: # %cond.false
-; X86-NEXT: bsrl %ecx, %esi
-; X86-NEXT: xorl $31, %esi
-; X86-NEXT: orl $32, %esi
+; X86-NEXT: bsrl %ecx, %eax
+; X86-NEXT: xorl $31, %eax
+; X86-NEXT: orl $32, %eax
; X86-NEXT: .LBB8_8: # %cond.false
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: orl %ebp, %ebx
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: orl 36(%ebp), %edx
; X86-NEXT: jne .LBB8_10
; X86-NEXT: # %bb.9: # %cond.false
-; X86-NEXT: orl $64, %esi
-; X86-NEXT: movl %esi, %edx
+; X86-NEXT: orl $64, %eax
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: .LBB8_10: # %cond.false
-; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill
+; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .LBB8_11: # %cond.end
-; X86-NEXT: xorl %esi, %esi
; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: orl %ebp, %edi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: orl 32(%ebp), %ecx
+; X86-NEXT: orl 36(%ebp), %edi
; X86-NEXT: orl %ecx, %edi
; X86-NEXT: je .LBB8_12
; X86-NEXT: # %bb.13: # %cond.end
-; X86-NEXT: xorl $127, %edx
-; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT: xorl $127, %esi
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: jmp .LBB8_14
; X86-NEXT: .LBB8_12:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: movl 48(%ebp), %ebx
+; X86-NEXT: movl 44(%ebp), %ecx
+; X86-NEXT: movl 40(%ebp), %esi
; X86-NEXT: .LBB8_14: # %cond.end
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -398,62 +399,67 @@ define i128 @cmov_bsr128_undef(i128 %x, i128 %y) nounwind {
; X86-LABEL: cmov_bsr128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: testl %edi, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: testl %eax, %eax
; X86-NEXT: jne .LBB9_1
; X86-NEXT: # %bb.2:
-; X86-NEXT: bsrl %esi, %ecx
-; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: orl $32, %ecx
+; X86-NEXT: bsrl %edi, %esi
+; X86-NEXT: xorl $31, %esi
+; X86-NEXT: orl $32, %esi
; X86-NEXT: jmp .LBB9_3
; X86-NEXT: .LBB9_1:
-; X86-NEXT: bsrl %edi, %ecx
-; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: bsrl %eax, %esi
+; X86-NEXT: xorl $31, %esi
; X86-NEXT: .LBB9_3:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl 24(%ebp), %ebx
; X86-NEXT: testl %edx, %edx
; X86-NEXT: jne .LBB9_4
; X86-NEXT: # %bb.5:
-; X86-NEXT: bsrl %ebx, %ebp
-; X86-NEXT: xorl $31, %ebp
-; X86-NEXT: orl $32, %ebp
-; X86-NEXT: jmp .LBB9_6
+; X86-NEXT: bsrl %ebx, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: orl $32, %ecx
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: je .LBB9_7
+; X86-NEXT: jmp .LBB9_8
; X86-NEXT: .LBB9_4:
-; X86-NEXT: bsrl %edx, %ebp
-; X86-NEXT: xorl $31, %ebp
-; X86-NEXT: .LBB9_6:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl %edi, %esi
+; X86-NEXT: bsrl %edx, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: orl %eax, %edi
; X86-NEXT: jne .LBB9_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: orl $64, %ebp
-; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: .LBB9_7:
+; X86-NEXT: orl $64, %ecx
+; X86-NEXT: movl %ecx, %esi
; X86-NEXT: .LBB9_8:
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl 32(%ebp), %ebx
; X86-NEXT: orl %edx, %ebx
; X86-NEXT: jne .LBB9_9
; X86-NEXT: # %bb.10:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 52(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %ecx
; X86-NEXT: jmp .LBB9_11
; X86-NEXT: .LBB9_9:
-; X86-NEXT: xorl $127, %ecx
+; X86-NEXT: xorl $127, %esi
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: xorl %esi, %esi
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: .LBB9_11:
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %edx, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/bswap-wide-int.ll b/llvm/test/CodeGen/X86/bswap-wide-int.ll
index 6d5e995a6d57..673b7f16de75 100644
--- a/llvm/test/CodeGen/X86/bswap-wide-int.ll
+++ b/llvm/test/CodeGen/X86/bswap-wide-int.ll
@@ -41,13 +41,16 @@ define i64 @bswap_i64(i64 %a0) nounwind {
define i128 @bswap_i128(i128 %a0) nounwind {
; X86-LABEL: bswap_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: bswapl %edi
; X86-NEXT: bswapl %esi
; X86-NEXT: bswapl %edx
@@ -56,25 +59,32 @@ define i128 @bswap_i128(i128 %a0) nounwind {
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X86-MOVBE-LABEL: bswap_i128:
; X86-MOVBE: # %bb.0:
+; X86-MOVBE-NEXT: pushl %ebp
+; X86-MOVBE-NEXT: movl %esp, %ebp
; X86-MOVBE-NEXT: pushl %edi
; X86-MOVBE-NEXT: pushl %esi
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-MOVBE-NEXT: andl $-16, %esp
+; X86-MOVBE-NEXT: movl 8(%ebp), %eax
+; X86-MOVBE-NEXT: movl 32(%ebp), %ecx
+; X86-MOVBE-NEXT: movl 36(%ebp), %edx
+; X86-MOVBE-NEXT: movl 24(%ebp), %esi
+; X86-MOVBE-NEXT: movl 28(%ebp), %edi
; X86-MOVBE-NEXT: movbel %esi, 12(%eax)
; X86-MOVBE-NEXT: movbel %edi, 8(%eax)
; X86-MOVBE-NEXT: movbel %ecx, 4(%eax)
; X86-MOVBE-NEXT: movbel %edx, (%eax)
+; X86-MOVBE-NEXT: leal -8(%ebp), %esp
; X86-MOVBE-NEXT: popl %esi
; X86-MOVBE-NEXT: popl %edi
+; X86-MOVBE-NEXT: popl %ebp
; X86-MOVBE-NEXT: retl $4
;
; X64-LABEL: bswap_i128:
diff --git a/llvm/test/CodeGen/X86/constructor.ll b/llvm/test/CodeGen/X86/constructor.ll
index f46325db8c19..dca62acff6ed 100644
--- a/llvm/test/CodeGen/X86/constructor.ll
+++ b/llvm/test/CodeGen/X86/constructor.ll
@@ -5,7 +5,6 @@
; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck --check-prefix=INIT-ARRAY %s
; RUN: llc -mtriple x86_64-unknown-freebsd < %s | FileCheck --check-prefix=INIT-ARRAY %s
; RUN: llc -mtriple x86_64-pc-solaris2.11 < %s | FileCheck --check-prefix=INIT-ARRAY %s
-; RUN: llc -mtriple x86_64-unknown-nacl < %s | FileCheck --check-prefix=NACL %s
; RUN: llc -mtriple i586-intel-elfiamcu -use-ctors < %s | FileCheck %s --check-prefix=MCU-CTORS
; RUN: llc -mtriple i586-intel-elfiamcu < %s | FileCheck %s --check-prefix=MCU-INIT-ARRAY
; RUN: llc -mtriple x86_64-win32-gnu < %s | FileCheck --check-prefix=COFF-CTOR %s
@@ -62,18 +61,6 @@ entry:
; INIT-ARRAY-NEXT: .quad i
; INIT-ARRAY-NEXT: .quad j
-; NACL: .section .init_array.15,"awG",@init_array,v,comdat
-; NACL-NEXT: .p2align 2
-; NACL-NEXT: .long g
-; NACL-NEXT: .section .init_array.55555,"awG",@init_array,v,comdat
-; NACL-NEXT: .p2align 2
-; NACL-NEXT: .long h
-; NACL-NEXT: .section .init_array,"aw",@init_array
-; NACL-NEXT: .p2align 2
-; NACL-NEXT: .long f
-; NACL-NEXT: .long i
-; NACL-NEXT: .long j
-
; MCU-CTORS: .section .ctors,"aw",@progbits
; MCU-INIT-ARRAY: .section .init_array,"aw",@init_array
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index d869f8ec01a5..661e7bb19641 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -152,17 +152,17 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $176, %esp
-; X86-NEXT: movl 20(%ebp), %edx
-; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 36(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: xorl %eax, %edx
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %edx
; X86-NEXT: xorl %eax, %edx
-; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %ecx
; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -172,16 +172,16 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 48(%ebp), %ecx
; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: movl 44(%ebp), %ebx
; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: xorl %edx, %edi
; X86-NEXT: subl %edx, %edi
; X86-NEXT: sbbl %edx, %ebx
@@ -488,13 +488,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: sbbl %ecx, %ebx
; X86-NEXT: sbbl %ecx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 44(%ebp), %ecx
+; X86-NEXT: movl 56(%ebp), %ecx
; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: movl %eax, 4(%ecx)
; X86-NEXT: movl %ebx, 8(%ecx)
; X86-NEXT: movl %esi, 12(%ecx)
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: movl 40(%ebp), %ecx
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -508,7 +508,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -523,17 +523,17 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl 40(%ebp), %eax
; X86-NEXT: imull %eax, %ebx
; X86-NEXT: mull %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: imull %esi, %edi
; X86-NEXT: addl %edx, %edi
; X86-NEXT: addl %ebx, %edi
-; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %eax
; X86-NEXT: movl %eax, %esi
; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: movl 52(%ebp), %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: imull %edx, %ebx
; X86-NEXT: mull %edx
@@ -543,13 +543,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: adcl %edi, %ebx
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl 12(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %edx
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl 16(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl 20(%ebp), %edi
+; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: sbbl %eax, %edi
-; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %esi
; X86-NEXT: sbbl %ebx, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, (%eax)
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 7bbddefd8272..370e1c608e44 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -152,26 +152,26 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $160, %esp
-; X86-NEXT: movl 28(%ebp), %ebx
-; X86-NEXT: movl 40(%ebp), %esi
-; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %edi
; X86-NEXT: movl %edi, %eax
; X86-NEXT: orl %esi, %eax
; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: orl 36(%ebp), %ecx
+; X86-NEXT: orl 48(%ebp), %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: sete %cl
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: orl 24(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %edx
-; X86-NEXT: orl 20(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: orl 36(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: orl 32(%ebp), %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: sete %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: bsrl %esi, %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: bsrl 36(%ebp), %ecx
+; X86-NEXT: bsrl 48(%ebp), %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: testl %esi, %esi
@@ -184,325 +184,310 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: testl %edi, %edi
; X86-NEXT: cmovnel %edx, %eax
; X86-NEXT: addl $64, %eax
-; X86-NEXT: movl 36(%ebp), %edx
+; X86-NEXT: movl 48(%ebp), %edx
; X86-NEXT: orl %esi, %edx
; X86-NEXT: cmovnel %ecx, %eax
-; X86-NEXT: movl 24(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %ebx
; X86-NEXT: bsrl %ebx, %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl 20(%ebp), %ecx
+; X86-NEXT: movl 32(%ebp), %ecx
; X86-NEXT: bsrl %ecx, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: movl 16(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %edi
; X86-NEXT: bsrl %edi, %esi
; X86-NEXT: xorl $31, %esi
-; X86-NEXT: bsrl 12(%ebp), %edx
+; X86-NEXT: bsrl 24(%ebp), %edx
; X86-NEXT: xorl $31, %edx
; X86-NEXT: addl $32, %edx
; X86-NEXT: testl %edi, %edi
; X86-NEXT: cmovnel %esi, %edx
; X86-NEXT: addl $64, %edx
-; X86-NEXT: movl 20(%ebp), %edi
-; X86-NEXT: movl %edi, %esi
+; X86-NEXT: movl 32(%ebp), %esi
; X86-NEXT: orl %ebx, %esi
; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edx, %edx
; X86-NEXT: movl $0, %ebx
; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: movl $127, %ecx
-; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT: movl $127, %edx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: xorl $127, %eax
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl $0, %edx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %ebx, %eax
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: sete %al
-; X86-NEXT: testb %cl, %cl
-; X86-NEXT: movb %cl, %ah
-; X86-NEXT: movl 24(%ebp), %ebx
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: cmovnel %esi, %ebx
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: cmovnel %esi, %ecx
+; X86-NEXT: sbbl %ebx, %edx
; X86-NEXT: movl $0, %edx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%ebp), %esi
-; X86-NEXT: cmovnel %edx, %esi
-; X86-NEXT: movl 12(%ebp), %edi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: orb %ah, %al
-; X86-NEXT: movl 44(%ebp), %eax
-; X86-NEXT: jne .LBB4_7
-; X86-NEXT: # %bb.1: # %udiv-bb1
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: setb %dl
+; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: cmovnel %edi, %eax
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: cmovnel %edi, %esi
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: cmovnel %edi, %edx
+; X86-NEXT: movl 24(%ebp), %ebx
+; X86-NEXT: cmovnel %edi, %ebx
+; X86-NEXT: movl 56(%ebp), %edi
+; X86-NEXT: jne .LBB4_8
+; X86-NEXT: # %bb.1: # %_udiv-special-cases
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: xorl $127, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl 56(%ebp), %edi
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: je .LBB4_8
+; X86-NEXT: # %bb.2: # %udiv-bb1
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 20(%ebp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorb $127, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
-; X86-NEXT: movl 136(%esp,%eax), %edi
-; X86-NEXT: movl 140(%esp,%eax), %esi
-; X86-NEXT: shldl %cl, %edi, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 136(%esp,%eax), %esi
+; X86-NEXT: movl 140(%esp,%eax), %edx
+; X86-NEXT: shldl %cl, %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 128(%esp,%eax), %ebx
-; X86-NEXT: movl 132(%esp,%eax), %eax
-; X86-NEXT: shldl %cl, %eax, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: shldl %cl, %ebx, %edi
+; X86-NEXT: movl 132(%esp,%eax), %edx
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: shldl %cl, %ebx, %edx
; X86-NEXT: shll %cl, %ebx
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: addl $1, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $0, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $0, %ecx
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl 20(%ebp), %ebx
-; X86-NEXT: jae .LBB4_2
-; X86-NEXT: # %bb.5:
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: jmp .LBB4_6
-; X86-NEXT: .LBB4_2: # %udiv-preheader
+; X86-NEXT: jae .LBB4_3
+; X86-NEXT: # %bb.6:
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: jmp .LBB4_7
+; X86-NEXT: .LBB4_3: # %udiv-preheader
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%ebp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%ebp), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl 92(%esp,%eax), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 88(%esp,%eax), %edx
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrdl %cl, %esi, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 92(%esp,%eax), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 88(%esp,%eax), %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 80(%esp,%eax), %edi
-; X86-NEXT: movl 84(%esp,%eax), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shrdl %cl, %edx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shrl %cl, %edx
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrdl %cl, %eax, %edi
+; X86-NEXT: shrdl %cl, %edx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 80(%esp,%eax), %edx
+; X86-NEXT: movl 84(%esp,%eax), %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shrdl %cl, %edi, %ebx
+; X86-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shrdl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%ebp), %eax
; X86-NEXT: addl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl 44(%ebp), %eax
; X86-NEXT: adcl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%ebp), %esi
-; X86-NEXT: adcl $-1, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %eax
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 52(%ebp), %eax
; X86-NEXT: adcl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: xorl %eax, %eax
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB4_3: # %udiv-do-while
+; X86-NEXT: .LBB4_4: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: shldl $1, %esi, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: shldl $1, %edi, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $1, %ebx, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $1, %edx, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $1, %ecx, %edx
-; X86-NEXT: orl %eax, %edx
+; X86-NEXT: shldl $1, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: shldl $1, %ebx, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $1, %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl $1, %edi, %ebx
+; X86-NEXT: shldl $1, %ecx, %edi
+; X86-NEXT: shldl $1, %esi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $1, %ecx, %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %ecx, %ecx
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: shldl $1, %ecx, %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl $1, %esi, %ecx
+; X86-NEXT: orl %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: addl %esi, %esi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: andl 52(%ebp), %esi
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl 40(%ebp), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl 36(%ebp), %eax
+; X86-NEXT: andl 48(%ebp), %eax
; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: andl 32(%ebp), %edx
-; X86-NEXT: andl 28(%ebp), %ecx
-; X86-NEXT: subl %ecx, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: andl 44(%ebp), %edx
+; X86-NEXT: andl 40(%ebp), %ecx
+; X86-NEXT: subl %ecx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl $-1, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: adcl $-1, %ebx
-; X86-NEXT: adcl $-1, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl $-1, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: adcl $-1, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %ebx, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edx, %ecx
; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: jne .LBB4_3
-; X86-NEXT: # %bb.4:
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jne .LBB4_4
+; X86-NEXT: # %bb.5:
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: .LBB4_6: # %udiv-loop-exit
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: shldl $1, %edi, %ebx
-; X86-NEXT: orl %eax, %ebx
-; X86-NEXT: shldl $1, %esi, %edi
-; X86-NEXT: orl %eax, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $1, %ecx, %esi
-; X86-NEXT: orl %eax, %esi
-; X86-NEXT: addl %ecx, %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: movl 44(%ebp), %eax
-; X86-NEXT: .LBB4_7: # %udiv-end
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: movl %esi, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl 56(%ebp), %edi
+; X86-NEXT: .LBB4_7: # %udiv-loop-exit
+; X86-NEXT: shldl $1, %esi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: shldl $1, %edx, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: shldl $1, %ebx, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: addl %ebx, %ebx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: .LBB4_8: # %udiv-end
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, (%edi)
+; X86-NEXT: movl %edx, 4(%edi)
+; X86-NEXT: movl %esi, 8(%edi)
+; X86-NEXT: movl %eax, 12(%edi)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl 48(%ebp), %eax
; X86-NEXT: movl %eax, %esi
; X86-NEXT: imull %edx, %esi
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %esi, %edx
-; X86-NEXT: movl 40(%ebp), %edi
-; X86-NEXT: imull %ecx, %edi
+; X86-NEXT: movl 52(%ebp), %edi
+; X86-NEXT: imull %ebx, %edi
; X86-NEXT: addl %edx, %edi
-; X86-NEXT: movl 28(%ebp), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: imull 28(%ebp), %ebx
-; X86-NEXT: addl %edx, %ebx
-; X86-NEXT: movl 32(%ebp), %edx
-; X86-NEXT: imull %edx, %esi
-; X86-NEXT: addl %ebx, %esi
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: imull 40(%ebp), %ecx
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: adcl %edi, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl 40(%ebp), %ecx
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %esi, %ecx
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull 32(%ebp)
-; X86-NEXT: movl 16(%ebp), %esi
+; X86-NEXT: addl %edi, %ecx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull 44(%ebp)
+; X86-NEXT: movl 28(%ebp), %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: adcl %ebx, %edi
; X86-NEXT: setb %cl
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull 32(%ebp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: mull 44(%ebp)
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl 12(%ebp), %ebx
+; X86-NEXT: movl 24(%ebp), %ebx
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl 20(%ebp), %edi
+; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: sbbl %eax, %edi
-; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %ecx
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ebx, (%eax)
diff --git a/llvm/test/CodeGen/X86/dollar-name-asm.ll b/llvm/test/CodeGen/X86/dollar-name-asm.ll
new file mode 100644
index 000000000000..cc649f24e09e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/dollar-name-asm.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s --check-prefix=ATT
+; RUN: llc < %s -mtriple=x86_64 -output-asm-variant=1 | FileCheck %s --check-prefix=INTEL
+
+module asm "mov ($foo), %eax"
+
+; ATT: movl ($foo), %eax
+; INTEL: mov eax, dword ptr [$foo]
diff --git a/llvm/test/CodeGen/X86/dollar-name.ll b/llvm/test/CodeGen/X86/dollar-name.ll
index fc9d6a77f66e..b997b5107f01 100644
--- a/llvm/test/CodeGen/X86/dollar-name.ll
+++ b/llvm/test/CodeGen/X86/dollar-name.ll
@@ -1,18 +1,73 @@
-; RUN: llc < %s -mtriple=i386-linux | FileCheck %s
-; PR1339
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+;; See also dollar-name-asm.ll for module asm tests, which update_llc_test_checks.py doesn't support.
+; RUN: llc < %s -mtriple=i686 -relocation-model=static | FileCheck %s --check-prefix=STATIC
+; RUN: llc < %s -mtriple=x86_64 -relocation-model=pic | FileCheck %s --check-prefix=PIC
+; RUN: llc < %s -mtriple=x86_64 -relocation-model=pic -output-asm-variant=1 | FileCheck %s --check-prefix=INTEL-PIC
-@"$bar" = global i32 zeroinitializer
-@"$qux" = external dso_local global i32
+@"$arr" = global [2 x i32] zeroinitializer
+@"$arr_h" = hidden global [2 x i32] zeroinitializer
+@"$tls" = hidden thread_local global i32 0
define i32 @"$foo"() nounwind {
-; CHECK: movl $bar,
-; CHECK: addl $qux,
-; CHECK: calll $hen
- %m = load i32, ptr @"$bar"
- %n = load i32, ptr @"$qux"
- %t = add i32 %m, %n
- %u = call i32 @"$hen"(i32 %t)
+; STATIC-LABEL: $foo:
+; STATIC: # %bb.0:
+; STATIC-NEXT: movl ($arr), %eax
+; STATIC-NEXT: movl %gs:0, %ecx
+; STATIC-NEXT: addl ($arr+4), %eax
+; STATIC-NEXT: addl ($tls@NTPOFF)(%ecx), %eax
+; STATIC-NEXT: pushl ($arr_h)
+; STATIC-NEXT: pushl %eax
+; STATIC-NEXT: calll ($hen@PLT)
+; STATIC-NEXT: addl $8, %esp
+; STATIC-NEXT: retl
+;
+; PIC-LABEL: $foo:
+; PIC: # %bb.0:
+; PIC-NEXT: pushq %rbp
+; PIC-NEXT: pushq %r14
+; PIC-NEXT: pushq %rbx
+; PIC-NEXT: movq ($arr@GOTPCREL)(%rip), %r14
+; PIC-NEXT: movl (%r14), %ebx
+; PIC-NEXT: movl ($arr_h)(%rip), %ebp
+; PIC-NEXT: leaq ($tls@TLSLD)(%rip), %rdi
+; PIC-NEXT: callq __tls_get_addr@PLT
+; PIC-NEXT: addl 4(%r14), %ebx
+; PIC-NEXT: addl ($tls@DTPOFF)(%rax), %ebx
+; PIC-NEXT: movl %ebx, %edi
+; PIC-NEXT: movl %ebp, %esi
+; PIC-NEXT: callq ($hen@PLT)
+; PIC-NEXT: popq %rbx
+; PIC-NEXT: popq %r14
+; PIC-NEXT: popq %rbp
+; PIC-NEXT: retq
+;
+; INTEL-PIC-LABEL: $foo:
+; INTEL-PIC: # %bb.0:
+; INTEL-PIC-NEXT: push rbp
+; INTEL-PIC-NEXT: push r14
+; INTEL-PIC-NEXT: push rbx
+; INTEL-PIC-NEXT: mov r14, qword ptr [rip + $arr@GOTPCREL]
+; INTEL-PIC-NEXT: mov ebx, dword ptr [r14]
+; INTEL-PIC-NEXT: mov ebp, dword ptr [rip + $arr_h]
+; INTEL-PIC-NEXT: lea rdi, [rip + $tls@TLSLD]
+; INTEL-PIC-NEXT: call __tls_get_addr@PLT
+; INTEL-PIC-NEXT: add ebx, dword ptr [r14 + 4]
+; INTEL-PIC-NEXT: add ebx, dword ptr [rax + $tls@DTPOFF]
+; INTEL-PIC-NEXT: mov edi, ebx
+; INTEL-PIC-NEXT: mov esi, ebp
+; INTEL-PIC-NEXT: call $hen@PLT
+; INTEL-PIC-NEXT: pop rbx
+; INTEL-PIC-NEXT: pop r14
+; INTEL-PIC-NEXT: pop rbp
+; INTEL-PIC-NEXT: ret
+ %m = load i32, ptr @"$arr"
+ %m1 = load i32, ptr getelementptr inbounds nuw (i32, ptr @"$arr", i23 1)
+ %m2 = load i32, ptr @"$arr_h"
+ %tls_v = load i32, ptr @"$tls"
+ %t0 = add i32 %m, %m1
+ %t1 = add i32 %t0, %tls_v
+ %u = call i32 @"$hen"(i32 %t1, i32 %m2)
ret i32 %u
}
-declare i32 @"$hen"(i32 %a)
+declare i32 @"$hen"(i32 %a, i32 %b)
diff --git a/llvm/test/CodeGen/X86/exp10-libcall.ll b/llvm/test/CodeGen/X86/exp10-libcall.ll
index a6959ace073f..4abf6b360ca3 100644
--- a/llvm/test/CodeGen/X86/exp10-libcall.ll
+++ b/llvm/test/CodeGen/X86/exp10-libcall.ll
@@ -2,14 +2,12 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
-define float @call_exp10f(float %a) {
+define float @call_exp10f(float %a) nounwind {
; CHECK-LABEL: call_exp10f:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq exp10f@PLT
; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: call_exp10f:
@@ -24,14 +22,12 @@ define float @call_exp10f(float %a) {
ret float %result
}
-define double @call_exp10(double %a) {
+define double @call_exp10(double %a) nounwind {
; CHECK-LABEL: call_exp10:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq exp10@PLT
; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: call_exp10:
@@ -46,16 +42,14 @@ define double @call_exp10(double %a) {
ret double %result
}
-define x86_fp80 @call_exp10l(x86_fp80 %a) {
+define x86_fp80 @call_exp10l(x86_fp80 %a) nounwind {
; CHECK-LABEL: call_exp10l:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
; CHECK-NEXT: fstpt (%rsp)
; CHECK-NEXT: callq exp10l@PLT
; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: call_exp10l:
diff --git a/llvm/test/CodeGen/X86/fast-isel-x32.ll b/llvm/test/CodeGen/X86/fast-isel-x32.ll
index 23f6304c88d2..e01cebecdbb0 100644
--- a/llvm/test/CodeGen/X86/fast-isel-x32.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-x32.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -fast-isel -fast-isel-abort=1 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-nacl -fast-isel -fast-isel-abort=1 | FileCheck %s
; Test that alloca addresses are materialized with the right size instruction.
diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
index 707b05f3478d..bb5640aeb66f 100644
--- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
@@ -481,18 +481,21 @@ define i128 @fptosi_i128(fp128 %x) nounwind strictfp {
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixtfti
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -501,7 +504,7 @@ define i128 @fptosi_i128(fp128 %x) nounwind strictfp {
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
@@ -620,18 +623,21 @@ define i128 @fptoui_i128(fp128 %x) nounwind strictfp {
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixunstfti
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -640,7 +646,7 @@ define i128 @fptoui_i128(fp128 %x) nounwind strictfp {
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
@@ -818,18 +824,21 @@ define fp128 @sitofp_i128(i128 %x) nounwind strictfp {
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __floattitf
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -838,7 +847,7 @@ define fp128 @sitofp_i128(i128 %x) nounwind strictfp {
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
@@ -1016,18 +1025,21 @@ define fp128 @uitofp_i128(i128 %x) nounwind strictfp {
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __floatuntitf
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1036,7 +1048,7 @@ define fp128 @uitofp_i128(i128 %x) nounwind strictfp {
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll
index 1de2484d47ba..6d4ec063ccd4 100644
--- a/llvm/test/CodeGen/X86/fp128-cast.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast.ll
@@ -415,16 +415,20 @@ define dso_local void @TestFPToSIF128_I128() nounwind {
; X86-LABEL: TestFPToSIF128_I128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl vf128, %eax
+; X86-NEXT: movl vf128+4, %ecx
+; X86-NEXT: movl vf128+8, %edx
+; X86-NEXT: movl vf128+12, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixtfti
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -432,7 +436,7 @@ define dso_local void @TestFPToSIF128_I128() nounwind {
; X86-NEXT: movl %edx, vi128+8
; X86-NEXT: movl %ecx, vi128+4
; X86-NEXT: movl %eax, vi128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -466,16 +470,20 @@ define dso_local void @TestFPToUIF128_U128() nounwind {
; X86-LABEL: TestFPToUIF128_U128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl vf128, %eax
+; X86-NEXT: movl vf128+4, %ecx
+; X86-NEXT: movl vf128+8, %edx
+; X86-NEXT: movl vf128+12, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixunstfti
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -483,7 +491,7 @@ define dso_local void @TestFPToUIF128_U128() nounwind {
; X86-NEXT: movl %edx, vu128+8
; X86-NEXT: movl %ecx, vu128+4
; X86-NEXT: movl %eax, vu128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -913,16 +921,20 @@ define dso_local void @TestSIToFPI128_F128() nounwind {
; X86-LABEL: TestSIToFPI128_F128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl vi128, %eax
+; X86-NEXT: movl vi128+4, %ecx
+; X86-NEXT: movl vi128+8, %edx
+; X86-NEXT: movl vi128+12, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl vi128+12
-; X86-NEXT: pushl vi128+8
-; X86-NEXT: pushl vi128+4
-; X86-NEXT: pushl vi128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __floattitf
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -930,7 +942,7 @@ define dso_local void @TestSIToFPI128_F128() nounwind {
; X86-NEXT: movl %edx, vf128+8
; X86-NEXT: movl %ecx, vf128+4
; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -964,16 +976,20 @@ define dso_local void @TestUIToFPU128_F128() #2 {
; X86-LABEL: TestUIToFPU128_F128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl vu128, %eax
+; X86-NEXT: movl vu128+4, %ecx
+; X86-NEXT: movl vu128+8, %edx
+; X86-NEXT: movl vu128+12, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl vu128+12
-; X86-NEXT: pushl vu128+8
-; X86-NEXT: pushl vu128+4
-; X86-NEXT: pushl vu128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __floatuntitf
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -981,7 +997,7 @@ define dso_local void @TestUIToFPU128_F128() #2 {
; X86-NEXT: movl %edx, vf128+8
; X86-NEXT: movl %ecx, vf128+4
; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -1134,33 +1150,30 @@ define dso_local i32 @TestBits128(fp128 %ld) nounwind {
;
; X86-LABEL: TestBits128:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $72, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: subl $12, %esp
-; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %edx
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __multf3
-; X86-NEXT: addl $44, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: orl (%esp), %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sete %al
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $72, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-AVX-LABEL: TestBits128:
@@ -1359,12 +1372,14 @@ define i1 @PR34866(i128 %x) nounwind {
;
; X86-LABEL: PR34866:
; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: sete %al
+; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-AVX-LABEL: PR34866:
@@ -1394,12 +1409,14 @@ define i1 @PR34866_commute(i128 %x) nounwind {
;
; X86-LABEL: PR34866_commute:
; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: sete %al
+; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-AVX-LABEL: PR34866_commute:
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index a7eea04181f6..ad2d690fd7ed 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -41,27 +41,40 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: add:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __addtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: add:
@@ -81,24 +94,32 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___addtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -107,9 +128,10 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -141,27 +163,40 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: sub:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __subtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: sub:
@@ -181,24 +216,32 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___subtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -207,9 +250,10 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -241,27 +285,40 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: mul:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __multf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: mul:
@@ -281,24 +338,32 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___multf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -307,9 +372,10 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -341,27 +407,40 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: div:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __divtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: div:
@@ -381,24 +460,32 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___divtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -407,9 +494,10 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -434,31 +522,48 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
;
; X86-LABEL: fma:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $92, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmaf128
-; X86-NEXT: addl $60, %esp
-; X86-NEXT: movaps (%esp), %xmm0
-; X86-NEXT: movaps %xmm0, (%esi)
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%ebp)
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: addl $92, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: fma:
@@ -481,28 +586,40 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $96, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 56(%ebp)
-; WIN-X86-NEXT: pushl 52(%ebp)
-; WIN-X86-NEXT: pushl 48(%ebp)
-; WIN-X86-NEXT: pushl 44(%ebp)
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 52(%ebp), %ebx
+; WIN-X86-NEXT: movl 56(%ebp), %edi
+; WIN-X86-NEXT: movl 60(%ebp), %edx
+; WIN-X86-NEXT: movl 64(%ebp), %ecx
+; WIN-X86-NEXT: movl 68(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 48(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 44(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 40(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmal
-; WIN-X86-NEXT: addl $52, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -511,9 +628,10 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -538,27 +656,40 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: frem:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmodf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: frem:
@@ -578,24 +709,32 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmodl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -604,9 +743,10 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -631,23 +771,28 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
;
; X86-LABEL: ceil:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll ceilf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: ceil:
@@ -667,17 +812,20 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _ceill
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -713,23 +861,28 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
;
; X86-LABEL: acos:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll acosf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: acos:
@@ -749,17 +902,20 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _acosl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -795,23 +951,28 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
;
; X86-LABEL: cos:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll cosf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: cos:
@@ -831,17 +992,20 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _cosl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -877,23 +1041,28 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
;
; X86-LABEL: cosh:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll coshf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: cosh:
@@ -913,17 +1082,20 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _coshl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -959,23 +1131,28 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
;
; X86-LABEL: exp:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll expf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: exp:
@@ -995,17 +1172,20 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _expl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1041,23 +1221,28 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
;
; X86-LABEL: exp2:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll exp2f128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: exp2:
@@ -1077,17 +1262,20 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _exp2l
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1123,23 +1311,28 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
;
; X86-LABEL: floor:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll floorf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: floor:
@@ -1159,17 +1352,20 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _floorl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1205,23 +1401,28 @@ define fp128 @log(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll logf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: log:
@@ -1241,17 +1442,20 @@ define fp128 @log(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _logl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1287,23 +1491,28 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log10:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll log10f128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: log10:
@@ -1323,17 +1532,20 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _log10l
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1369,23 +1581,28 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log2:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll log2f128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: log2:
@@ -1405,17 +1622,20 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _log2l
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1451,27 +1671,40 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: maxnum:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmaxf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: maxnum:
@@ -1491,24 +1724,32 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmaxl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1517,9 +1758,10 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1544,27 +1786,40 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: minnum:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fminf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: minnum:
@@ -1584,24 +1839,32 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fminl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1610,9 +1873,10 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1637,23 +1901,28 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
;
; X86-LABEL: nearbyint:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll nearbyintf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: nearbyint:
@@ -1673,17 +1942,20 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _nearbyintl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1719,27 +1991,40 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: pow:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll powf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: pow:
@@ -1759,24 +2044,32 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _powl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1785,9 +2078,10 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1819,24 +2113,32 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
;
; X86-LABEL: powi:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $64, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __powitf2
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $64, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl $4
;
; WIN-LABEL: powi:
@@ -1853,21 +2155,26 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___powitf2
-; WIN-X86-NEXT: addl $24, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1876,9 +2183,10 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1903,23 +2211,28 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
;
; X86-LABEL: rint:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll rintf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: rint:
@@ -1939,17 +2252,20 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _rintl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1985,23 +2301,28 @@ define fp128 @round(fp128 %x) nounwind strictfp {
;
; X86-LABEL: round:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll roundf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: round:
@@ -2021,17 +2342,20 @@ define fp128 @round(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _roundl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2067,23 +2391,28 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
;
; X86-LABEL: roundeven:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll roundevenf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: roundeven:
@@ -2103,17 +2432,20 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _roundevenl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2149,23 +2481,28 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
;
; X86-LABEL: asin:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll asinf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: asin:
@@ -2185,17 +2522,20 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _asinl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2231,23 +2571,28 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
;
; X86-LABEL: sin:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sinf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: sin:
@@ -2267,17 +2612,20 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sinl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2313,23 +2661,28 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
;
; X86-LABEL: sinh:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sinhf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: sinh:
@@ -2349,17 +2702,20 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sinhl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2395,23 +2751,28 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
;
; X86-LABEL: sqrt:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sqrtf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: sqrt:
@@ -2431,17 +2792,20 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sqrtl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2477,23 +2841,28 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
;
; X86-LABEL: atan:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll atanf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: atan:
@@ -2513,17 +2882,20 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _atanl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2559,27 +2931,40 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: atan2:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll atan2f128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: atan2:
@@ -2599,24 +2984,32 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _atan2l
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2625,9 +3018,10 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -2652,23 +3046,28 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
;
; X86-LABEL: tan:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll tanf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: tan:
@@ -2688,17 +3087,20 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _tanl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2734,23 +3136,28 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
;
; X86-LABEL: tanh:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll tanhf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: tanh:
@@ -2770,17 +3177,20 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _tanhl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2816,23 +3226,28 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
;
; X86-LABEL: trunc:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll truncf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: trunc:
@@ -2852,17 +3267,20 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _truncl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2919,12 +3337,18 @@ define i32 @lrint(fp128 %x) nounwind strictfp {
;
; WIN-X86-LABEL: lrint:
; WIN-X86: # %bb.0: # %entry
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 20(%ebp)
+; WIN-X86-NEXT: pushl 16(%ebp)
+; WIN-X86-NEXT: pushl 12(%ebp)
+; WIN-X86-NEXT: pushl 8(%ebp)
; WIN-X86-NEXT: calll _lrintl
; WIN-X86-NEXT: addl $16, %esp
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
%rint = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -2969,12 +3393,18 @@ define i64 @llrint(fp128 %x) nounwind strictfp {
;
; WIN-X86-LABEL: llrint:
; WIN-X86: # %bb.0: # %entry
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 20(%ebp)
+; WIN-X86-NEXT: pushl 16(%ebp)
+; WIN-X86-NEXT: pushl 12(%ebp)
+; WIN-X86-NEXT: pushl 8(%ebp)
; WIN-X86-NEXT: calll _llrintl
; WIN-X86-NEXT: addl $16, %esp
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
%rint = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -3019,12 +3449,18 @@ define i32 @lround(fp128 %x) nounwind strictfp {
;
; WIN-X86-LABEL: lround:
; WIN-X86: # %bb.0: # %entry
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 20(%ebp)
+; WIN-X86-NEXT: pushl 16(%ebp)
+; WIN-X86-NEXT: pushl 12(%ebp)
+; WIN-X86-NEXT: pushl 8(%ebp)
; WIN-X86-NEXT: calll _lroundl
; WIN-X86-NEXT: addl $16, %esp
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
%round = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
@@ -3069,12 +3505,18 @@ define i64 @llround(fp128 %x) nounwind strictfp {
;
; WIN-X86-LABEL: llround:
; WIN-X86: # %bb.0: # %entry
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 20(%ebp)
+; WIN-X86-NEXT: pushl 16(%ebp)
+; WIN-X86-NEXT: pushl 12(%ebp)
+; WIN-X86-NEXT: pushl 8(%ebp)
; WIN-X86-NEXT: calll _llroundl
; WIN-X86-NEXT: addl $16, %esp
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
%round = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
@@ -3176,26 +3618,32 @@ define i64 @cmp(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
;
; WIN-X86-LABEL: cmp:
; WIN-X86: # %bb.0:
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
+; WIN-X86-NEXT: pushl 36(%ebp)
+; WIN-X86-NEXT: pushl 32(%ebp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___eqtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: je LBB37_1
; WIN-X86-NEXT: # %bb.2:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 16(%ebp), %ecx
; WIN-X86-NEXT: jmp LBB37_3
; WIN-X86-NEXT: LBB37_1:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 8(%ebp), %ecx
; WIN-X86-NEXT: LBB37_3:
; WIN-X86-NEXT: movl (%ecx), %eax
; WIN-X86-NEXT: movl 4(%ecx), %edx
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
%cond = call i1 @llvm.experimental.constrained.fcmp.f128(
fp128 %x, fp128 %y,
@@ -3300,26 +3748,32 @@ define i64 @cmps(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
;
; WIN-X86-LABEL: cmps:
; WIN-X86: # %bb.0:
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
+; WIN-X86-NEXT: pushl 36(%ebp)
+; WIN-X86-NEXT: pushl 32(%ebp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___eqtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: je LBB38_1
; WIN-X86-NEXT: # %bb.2:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 16(%ebp), %ecx
; WIN-X86-NEXT: jmp LBB38_3
; WIN-X86-NEXT: LBB38_1:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 8(%ebp), %ecx
; WIN-X86-NEXT: LBB38_3:
; WIN-X86-NEXT: movl (%ecx), %eax
; WIN-X86-NEXT: movl 4(%ecx), %edx
+; WIN-X86-NEXT: movl %ebp, %esp
+; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
%cond = call i1 @llvm.experimental.constrained.fcmps.f128(
fp128 %x, fp128 %y,
@@ -3496,44 +3950,47 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; WIN-X86-LABEL: cmp_ueq_q:
; WIN-X86: # %bb.0:
; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: movl 32(%ebp), %edi
+; WIN-X86-NEXT: movl 36(%ebp), %esi
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: pushl %edi
-; WIN-X86-NEXT: pushl %ebp
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___eqtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: sete %bl
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: pushl %edi
-; WIN-X86-NEXT: pushl %ebp
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___unordtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: orb %bl, %al
; WIN-X86-NEXT: jne LBB39_1
; WIN-X86-NEXT: # %bb.2:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 16(%ebp), %ecx
; WIN-X86-NEXT: jmp LBB39_3
; WIN-X86-NEXT: LBB39_1:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 8(%ebp), %ecx
; WIN-X86-NEXT: LBB39_3:
; WIN-X86-NEXT: movl (%ecx), %eax
; WIN-X86-NEXT: movl 4(%ecx), %edx
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
; WIN-X86-NEXT: popl %ebx
@@ -3716,32 +4173,34 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; WIN-X86-LABEL: cmp_one_q:
; WIN-X86: # %bb.0:
; WIN-X86-NEXT: pushl %ebp
+; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: andl $-16, %esp
+; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: movl 32(%ebp), %edi
+; WIN-X86-NEXT: movl 36(%ebp), %esi
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: pushl %edi
-; WIN-X86-NEXT: pushl %ebp
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___eqtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: setne %bl
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 52(%ebp)
+; WIN-X86-NEXT: pushl 48(%ebp)
+; WIN-X86-NEXT: pushl 44(%ebp)
+; WIN-X86-NEXT: pushl 40(%ebp)
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: pushl %edi
-; WIN-X86-NEXT: pushl %ebp
-; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: pushl 28(%ebp)
+; WIN-X86-NEXT: pushl 24(%ebp)
; WIN-X86-NEXT: calll ___unordtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
@@ -3749,13 +4208,14 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; WIN-X86-NEXT: testb %bl, %al
; WIN-X86-NEXT: jne LBB40_1
; WIN-X86-NEXT: # %bb.2:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 16(%ebp), %ecx
; WIN-X86-NEXT: jmp LBB40_3
; WIN-X86-NEXT: LBB40_1:
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN-X86-NEXT: leal 8(%ebp), %ecx
; WIN-X86-NEXT: LBB40_3:
; WIN-X86-NEXT: movl (%ecx), %eax
; WIN-X86-NEXT: movl 4(%ecx), %edx
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
; WIN-X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll
index f727a7907862..4b0449fd7502 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll
@@ -42,22 +42,38 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Add:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __addtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Add:
@@ -78,22 +94,31 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___addtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -101,8 +126,10 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -144,22 +171,38 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Add:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __addtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Add:
@@ -180,22 +223,31 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___addtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -203,8 +255,10 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -241,22 +295,38 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Sub:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __subtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Sub:
@@ -277,22 +347,31 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___subtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -300,8 +379,10 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -343,22 +424,38 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Sub:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __subtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Sub:
@@ -379,22 +476,31 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___subtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -402,8 +508,10 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -440,22 +548,38 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Mul:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __multf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Mul:
@@ -476,22 +600,31 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___multf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -499,8 +632,10 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -542,22 +677,38 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Mul:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __multf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Mul:
@@ -578,22 +729,31 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___multf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -601,8 +761,10 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -639,22 +801,38 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Div:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __divtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Div:
@@ -675,22 +853,31 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___divtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -698,8 +885,10 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -741,22 +930,38 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Div:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __divtf3
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Div:
@@ -777,22 +982,31 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll ___divtf3
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -800,8 +1014,10 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -830,22 +1046,38 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Rem:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmodf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128Rem:
@@ -866,22 +1098,31 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %edi
+; WIN-X86-NEXT: movl 28(%ebp), %ebx
+; WIN-X86-NEXT: movl 32(%ebp), %ecx
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmodl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -889,8 +1130,10 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+8
; WIN-X86-NEXT: movl %ecx, _vf128+4
; WIN-X86-NEXT: movl %eax, _vf128
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -922,22 +1165,38 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Rem:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $76, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl vf128, %edi
+; X86-NEXT: movl vf128+4, %ebx
+; X86-NEXT: movl vf128+8, %ebp
+; X86-NEXT: movl vf128+12, %eax
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl vf128+12
-; X86-NEXT: pushl vf128+8
-; X86-NEXT: pushl vf128+4
-; X86-NEXT: pushl vf128
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmodf128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $76, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; WIN-LABEL: Test128_1Rem:
@@ -958,22 +1217,31 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
+; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl _vf128+12
-; WIN-X86-NEXT: pushl _vf128+8
-; WIN-X86-NEXT: pushl _vf128+4
-; WIN-X86-NEXT: pushl _vf128
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $80, %esp
+; WIN-X86-NEXT: movl 16(%ebp), %esi
+; WIN-X86-NEXT: movl 20(%ebp), %edi
+; WIN-X86-NEXT: movl _vf128, %edx
+; WIN-X86-NEXT: movl _vf128+4, %ebx
+; WIN-X86-NEXT: movl _vf128+8, %ecx
+; WIN-X86-NEXT: movl _vf128+12, %eax
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 12(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmodl
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -981,8 +1249,10 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %edx, _vf128+12
; WIN-X86-NEXT: movl %eax, _vf128
; WIN-X86-NEXT: movl %ecx, _vf128+4
-; WIN-X86-NEXT: leal -4(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
+; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1011,18 +1281,24 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Sqrt:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sqrtf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Sqrt:
@@ -1042,16 +1318,19 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sqrtl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1089,18 +1368,24 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Sin:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sinf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Sin:
@@ -1120,16 +1405,19 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sinl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1167,18 +1455,24 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Cos:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll cosf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Cos:
@@ -1198,16 +1492,19 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _cosl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1245,18 +1542,24 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Ceil:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll ceilf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Ceil:
@@ -1276,16 +1579,19 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _ceill
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1323,18 +1629,24 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Floor:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll floorf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Floor:
@@ -1354,16 +1666,19 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _floorl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1401,18 +1716,24 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Trunc:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll truncf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Trunc:
@@ -1432,16 +1753,19 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _truncl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1479,18 +1803,24 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Nearbyint:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll nearbyintf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Nearbyint:
@@ -1510,16 +1840,19 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _nearbyintl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1557,18 +1890,24 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Rint:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll rintf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Rint:
@@ -1588,16 +1927,19 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _rintl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1635,18 +1977,24 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Round:
; X86: # %bb.0: # %entry
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll roundf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, vf128
-; X86-NEXT: addl $28, %esp
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; WIN-LABEL: Test128Round:
@@ -1666,16 +2014,19 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
; WIN-X86-NEXT: movl %esp, %ebp
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $32, %esp
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl 8(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: movl 8(%ebp), %eax
+; WIN-X86-NEXT: movl 12(%ebp), %ecx
+; WIN-X86-NEXT: movl 16(%ebp), %edx
+; WIN-X86-NEXT: movl 20(%ebp), %esi
+; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _roundl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1705,31 +2056,48 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
;
; X86-LABEL: Test128FMA:
; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $92, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll fmaf128
-; X86-NEXT: addl $60, %esp
-; X86-NEXT: movaps (%esp), %xmm0
-; X86-NEXT: movaps %xmm0, (%esi)
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%ebp)
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: addl $92, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128FMA:
@@ -1752,28 +2120,40 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
; WIN-X86: # %bb.0: # %entry
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $96, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 56(%ebp)
-; WIN-X86-NEXT: pushl 52(%ebp)
-; WIN-X86-NEXT: pushl 48(%ebp)
-; WIN-X86-NEXT: pushl 44(%ebp)
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 52(%ebp), %ebx
+; WIN-X86-NEXT: movl 56(%ebp), %edi
+; WIN-X86-NEXT: movl 60(%ebp), %edx
+; WIN-X86-NEXT: movl 64(%ebp), %ecx
+; WIN-X86-NEXT: movl 68(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 48(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 44(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 40(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 36(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _fmal
-; WIN-X86-NEXT: addl $52, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1782,9 +2162,10 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
entry:
@@ -1804,23 +2185,28 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
;
; X86-LABEL: Test128Acos:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll acosf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Acos:
@@ -1840,17 +2226,20 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _acosl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1879,23 +2268,28 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
;
; X86-LABEL: Test128Asin:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll asinf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Asin:
@@ -1915,17 +2309,20 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _asinl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -1954,23 +2351,28 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
;
; X86-LABEL: Test128Atan:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll atanf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Atan:
@@ -1990,17 +2392,20 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _atanl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2029,27 +2434,40 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
;
; X86-LABEL: Test128Atan2:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $76, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll atan2f128
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $76, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Atan2:
@@ -2069,24 +2487,32 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
; WIN-X86: # %bb.0:
; WIN-X86-NEXT: pushl %ebp
; WIN-X86-NEXT: movl %esp, %ebp
+; WIN-X86-NEXT: pushl %ebx
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $80, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 40(%ebp)
-; WIN-X86-NEXT: pushl 36(%ebp)
-; WIN-X86-NEXT: pushl 32(%ebp)
-; WIN-X86-NEXT: pushl 28(%ebp)
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl 40(%ebp), %ebx
+; WIN-X86-NEXT: movl 44(%ebp), %edx
+; WIN-X86-NEXT: movl 48(%ebp), %ecx
+; WIN-X86-NEXT: movl 52(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 32(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 28(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _atan2l
-; WIN-X86-NEXT: addl $36, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2095,9 +2521,10 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
; WIN-X86-NEXT: movl %ecx, 4(%esi)
; WIN-X86-NEXT: movl %eax, (%esi)
; WIN-X86-NEXT: movl %esi, %eax
-; WIN-X86-NEXT: leal -8(%ebp), %esp
+; WIN-X86-NEXT: leal -12(%ebp), %esp
; WIN-X86-NEXT: popl %esi
; WIN-X86-NEXT: popl %edi
+; WIN-X86-NEXT: popl %ebx
; WIN-X86-NEXT: popl %ebp
; WIN-X86-NEXT: retl
%x = call fp128 @llvm.atan2.f128(fp128 %a, fp128 %b)
@@ -2115,23 +2542,28 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
;
; X86-LABEL: Test128Cosh:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll coshf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Cosh:
@@ -2151,17 +2583,20 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _coshl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2190,23 +2625,28 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
;
; X86-LABEL: Test128Sinh:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll sinhf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Sinh:
@@ -2226,17 +2666,20 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _sinhl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2265,23 +2708,28 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
;
; X86-LABEL: Test128Tan:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll tanf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Tan:
@@ -2301,17 +2749,20 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _tanl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2340,23 +2791,28 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
;
; X86-LABEL: Test128Tanh:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll tanhf128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Tanh:
@@ -2376,17 +2832,20 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $16, %esp
+; WIN-X86-NEXT: subl $48, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
-; WIN-X86-NEXT: movl %esp, %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %eax
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _tanhl
-; WIN-X86-NEXT: addl $20, %esp
-; WIN-X86-NEXT: movl (%esp), %eax
+; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -2425,27 +2884,34 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind {
;
; X86-LABEL: Test128Modf:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $40, %esp
+; X86-NEXT: subl $80, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: leal {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %ecx
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll modff128
-; X86-NEXT: addl $28, %esp
-; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm1
; X86-NEXT: movaps %xmm1, 16(%esi)
; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $40, %esp
+; X86-NEXT: addl $80, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl $4
;
; WIN-LABEL: Test128Modf:
@@ -2468,18 +2934,21 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind {
; WIN-X86-NEXT: pushl %edi
; WIN-X86-NEXT: pushl %esi
; WIN-X86-NEXT: andl $-16, %esp
-; WIN-X86-NEXT: subl $64, %esp
+; WIN-X86-NEXT: subl $112, %esp
; WIN-X86-NEXT: movl 8(%ebp), %esi
+; WIN-X86-NEXT: movl 24(%ebp), %eax
+; WIN-X86-NEXT: movl 28(%ebp), %ecx
+; WIN-X86-NEXT: movl 32(%ebp), %edx
+; WIN-X86-NEXT: movl 36(%ebp), %edi
+; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ebx
+; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; WIN-X86-NEXT: pushl %eax
-; WIN-X86-NEXT: pushl 24(%ebp)
-; WIN-X86-NEXT: pushl 20(%ebp)
-; WIN-X86-NEXT: pushl 16(%ebp)
-; WIN-X86-NEXT: pushl 12(%ebp)
-; WIN-X86-NEXT: pushl %ecx
+; WIN-X86-NEXT: movl %eax, (%esp)
; WIN-X86-NEXT: calll _modfl
-; WIN-X86-NEXT: addl $24, %esp
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN-X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
diff --git a/llvm/test/CodeGen/X86/frameaddr.ll b/llvm/test/CodeGen/X86/frameaddr.ll
index 7f2f9b8bd6fd..33c9e3855c10 100644
--- a/llvm/test/CodeGen/X86/frameaddr.ll
+++ b/llvm/test/CodeGen/X86/frameaddr.ll
@@ -5,8 +5,6 @@
; RUN: llc < %s -mtriple=x86_64-unknown -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=CHECK-64
; RUN: llc < %s -mtriple=x86_64-gnux32 | FileCheck %s --check-prefix=CHECK-X32ABI
; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=CHECK-X32ABI
-; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s --check-prefix=CHECK-NACL64
-; RUN: llc < %s -mtriple=x86_64-nacl -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=CHECK-NACL64
define ptr @test1() nounwind {
entry:
@@ -34,10 +32,6 @@ entry:
; CHECK-X32ABI-NEXT: movl %ebp, %eax
; CHECK-X32ABI-NEXT: popq %rbp
; CHECK-X32ABI-NEXT: ret
-; CHECK-NACL64-LABEL: test1
-; CHECK-NACL64: pushq %rbp
-; CHECK-NACL64-NEXT: movq %rsp, %rbp
-; CHECK-NACL64-NEXT: movl %ebp, %eax
%0 = tail call ptr @llvm.frameaddress(i32 0)
ret ptr %0
}
@@ -71,11 +65,6 @@ entry:
; CHECK-X32ABI-NEXT: movl (%eax), %eax
; CHECK-X32ABI-NEXT: popq %rbp
; CHECK-X32ABI-NEXT: ret
-; CHECK-NACL64-LABEL: test2
-; CHECK-NACL64: pushq %rbp
-; CHECK-NACL64-NEXT: movq %rsp, %rbp
-; CHECK-NACL64-NEXT: movl (%ebp), %eax
-; CHECK-NACL64-NEXT: movl (%eax), %eax
%0 = tail call ptr @llvm.frameaddress(i32 2)
ret ptr %0
}
diff --git a/llvm/test/CodeGen/X86/freeze-unary.ll b/llvm/test/CodeGen/X86/freeze-unary.ll
index 8602c385af83..bc9e29957c74 100644
--- a/llvm/test/CodeGen/X86/freeze-unary.ll
+++ b/llvm/test/CodeGen/X86/freeze-unary.ll
@@ -70,6 +70,86 @@ define <2 x i64> @freeze_zext_vec(<2 x i16> %a0) nounwind {
ret <2 x i64> %z
}
+define i32 @freeze_abs(i32 %a0) nounwind {
+; X86-LABEL: freeze_abs:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_abs:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %edi, %eax
+; X64-NEXT: retq
+ %x = call i32 @llvm.abs.i32(i32 %a0, i1 0)
+ %f = freeze i32 %x
+ %r = call i32 @llvm.abs.i32(i32 %f, i1 0)
+ ret i32 %r
+}
+
+define <4 x i32> @freeze_abs_vec(<4 x i32> %a0) nounwind {
+; X86-LABEL: freeze_abs_vec:
+; X86: # %bb.0:
+; X86-NEXT: movdqa %xmm0, %xmm1
+; X86-NEXT: psrad $31, %xmm1
+; X86-NEXT: pxor %xmm1, %xmm0
+; X86-NEXT: psubd %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_abs_vec:
+; X64: # %bb.0:
+; X64-NEXT: pabsd %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a0, i1 0)
+ %f = freeze <4 x i32> %x
+ %r = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %f, i1 0)
+ ret <4 x i32> %r
+}
+
+define i32 @freeze_abs_undef(i32 %a0) nounwind {
+; X86-LABEL: freeze_abs_undef:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_abs_undef:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %edi, %eax
+; X64-NEXT: retq
+ %x = call i32 @llvm.abs.i32(i32 %a0, i1 -1)
+ %f = freeze i32 %x
+ %r = call i32 @llvm.abs.i32(i32 %f, i1 -1)
+ ret i32 %r
+}
+
+define <4 x i32> @freeze_abs_undef_vec(<4 x i32> %a0) nounwind {
+; X86-LABEL: freeze_abs_undef_vec:
+; X86: # %bb.0:
+; X86-NEXT: movdqa %xmm0, %xmm1
+; X86-NEXT: psrad $31, %xmm1
+; X86-NEXT: pxor %xmm1, %xmm0
+; X86-NEXT: psubd %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_abs_undef_vec:
+; X64: # %bb.0:
+; X64-NEXT: pabsd %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a0, i1 -1)
+ %f = freeze <4 x i32> %x
+ %r = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %f, i1 -1)
+ ret <4 x i32> %r
+}
+
define i32 @freeze_bswap(i32 %a0) nounwind {
; X86-LABEL: freeze_bswap:
; X86: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index e8c8ccfa8d37..ec1b8a3c8d6d 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -264,53 +264,62 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-LABEL: var_shift_i128:
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: pushl %ebp
+; X86-FAST-NEXT: movl %esp, %ebp
; X86-FAST-NEXT: pushl %ebx
; X86-FAST-NEXT: pushl %edi
; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: andl $-16, %esp
+; X86-FAST-NEXT: subl $16, %esp
+; X86-FAST-NEXT: movl 24(%ebp), %edi
+; X86-FAST-NEXT: movl 28(%ebp), %edx
+; X86-FAST-NEXT: movl 48(%ebp), %esi
+; X86-FAST-NEXT: movl 56(%ebp), %ecx
; X86-FAST-NEXT: testb $64, %cl
+; X86-FAST-NEXT: movl 52(%ebp), %eax
; X86-FAST-NEXT: jne .LBB6_1
; X86-FAST-NEXT: # %bb.2:
-; X86-FAST-NEXT: movl %ebx, %ebp
; X86-FAST-NEXT: movl %esi, %ebx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl %edi, %eax
-; X86-FAST-NEXT: movl %edx, %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT: movl %edi, %esi
+; X86-FAST-NEXT: movl 32(%ebp), %edi
+; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-FAST-NEXT: movl %edx, %eax
+; X86-FAST-NEXT: movl 36(%ebp), %edx
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: je .LBB6_5
; X86-FAST-NEXT: .LBB6_4:
-; X86-FAST-NEXT: movl %esi, %edx
-; X86-FAST-NEXT: movl %edi, %esi
-; X86-FAST-NEXT: movl %ebx, %edi
-; X86-FAST-NEXT: movl %eax, %ebx
+; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl %esi, %eax
+; X86-FAST-NEXT: movl (%esp), %esi # 4-byte Reload
; X86-FAST-NEXT: jmp .LBB6_6
; X86-FAST-NEXT: .LBB6_1:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: movl 44(%ebp), %ebx
+; X86-FAST-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-FAST-NEXT: movl 40(%ebp), %ebx
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: jne .LBB6_4
; X86-FAST-NEXT: .LBB6_5:
-; X86-FAST-NEXT: movl %eax, %ebp
+; X86-FAST-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-FAST-NEXT: .LBB6_6:
-; X86-FAST-NEXT: movl %ebx, %eax
-; X86-FAST-NEXT: shldl %cl, %ebp, %eax
-; X86-FAST-NEXT: movl %edi, %ebp
-; X86-FAST-NEXT: shldl %cl, %ebx, %ebp
-; X86-FAST-NEXT: movl %esi, %ebx
-; X86-FAST-NEXT: shldl %cl, %edi, %ebx
+; X86-FAST-NEXT: movl %esi, %edi
+; X86-FAST-NEXT: shldl %cl, %ebx, %edi
+; X86-FAST-NEXT: movl %eax, %edx
+; X86-FAST-NEXT: movl %eax, %ebx
+; X86-FAST-NEXT: shldl %cl, %esi, %ebx
+; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-FAST-NEXT: movl %eax, %esi
+; X86-FAST-NEXT: shldl %cl, %edx, %esi
; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-FAST-NEXT: shldl %cl, %esi, %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-FAST-NEXT: movl %edx, 12(%ecx)
-; X86-FAST-NEXT: movl %ebx, 8(%ecx)
-; X86-FAST-NEXT: movl %ebp, 4(%ecx)
-; X86-FAST-NEXT: movl %eax, (%ecx)
-; X86-FAST-NEXT: movl %ecx, %eax
+; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-FAST-NEXT: shldl %cl, %eax, %edx
+; X86-FAST-NEXT: movl 8(%ebp), %eax
+; X86-FAST-NEXT: movl %edx, 12(%eax)
+; X86-FAST-NEXT: movl %esi, 8(%eax)
+; X86-FAST-NEXT: movl %ebx, 4(%eax)
+; X86-FAST-NEXT: movl %edi, (%eax)
+; X86-FAST-NEXT: leal -12(%ebp), %esp
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: popl %edi
; X86-FAST-NEXT: popl %ebx
@@ -320,77 +329,91 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-LABEL: var_shift_i128:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: pushl %ebp
+; X86-SLOW-NEXT: movl %esp, %ebp
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: pushl %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: testb $64, %al
+; X86-SLOW-NEXT: andl $-16, %esp
+; X86-SLOW-NEXT: subl $32, %esp
+; X86-SLOW-NEXT: movl 24(%ebp), %esi
+; X86-SLOW-NEXT: movl 28(%ebp), %eax
+; X86-SLOW-NEXT: movl 48(%ebp), %edx
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: testb $64, %cl
+; X86-SLOW-NEXT: movl 52(%ebp), %edi
; X86-SLOW-NEXT: jne .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %edx, %ebp
-; X86-SLOW-NEXT: movl %ebx, %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %esi, %edx
+; X86-SLOW-NEXT: movl 32(%ebp), %esi
; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: testb $32, %al
-; X86-SLOW-NEXT: je .LBB6_5
-; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edi, %ebx
-; X86-SLOW-NEXT: movl %edx, %edi
-; X86-SLOW-NEXT: movl %ecx, %edx
-; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl 36(%ebp), %eax
+; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLOW-NEXT: testb $32, %al
+; X86-SLOW-NEXT: movl 40(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl 44(%ebp), %ecx
+; X86-SLOW-NEXT: .LBB6_3:
+; X86-SLOW-NEXT: movl 56(%ebp), %ebx
+; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: jne .LBB6_4
-; X86-SLOW-NEXT: .LBB6_5:
-; X86-SLOW-NEXT: movl %ecx, %ebp
-; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: # %bb.5:
+; X86-SLOW-NEXT: movl %ecx, %ebx
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: .LBB6_4:
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
; X86-SLOW-NEXT: movl %edx, %esi
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: shrl %ebp
-; X86-SLOW-NEXT: movb %al, %ch
-; X86-SLOW-NEXT: notb %ch
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: orl %esi, %ebp
-; X86-SLOW-NEXT: movl %edi, %esi
-; X86-SLOW-NEXT: movb %al, %cl
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: shrl %edx
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: orl %esi, %edx
-; X86-SLOW-NEXT: movl %ebx, %esi
-; X86-SLOW-NEXT: movb %al, %cl
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: shll %cl, %esi
+; X86-SLOW-NEXT: movl %ebx, %edi
; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: movl %ecx, %ebx
+; X86-SLOW-NEXT: notb %bl
+; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: movb %al, %cl
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-SLOW-NEXT: movl %esi, %eax
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: shrl %edx
+; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %edx
+; X86-SLOW-NEXT: orl %eax, %edx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ebx, %eax
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: shrl %esi
+; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %ebx
-; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-SLOW-NEXT: shrl %cl, %ebx
; X86-SLOW-NEXT: orl %eax, %ebx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl 8(%ebp), %eax
; X86-SLOW-NEXT: movl %ebx, 12(%eax)
-; X86-SLOW-NEXT: movl %edi, 8(%eax)
+; X86-SLOW-NEXT: movl %esi, 8(%eax)
; X86-SLOW-NEXT: movl %edx, 4(%eax)
-; X86-SLOW-NEXT: movl %ebp, (%eax)
-; X86-SLOW-NEXT: addl $4, %esp
+; X86-SLOW-NEXT: movl %edi, (%eax)
+; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index 4340f8fd484a..544ab7fc7737 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -258,51 +258,53 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-LABEL: var_shift_i128:
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: pushl %ebp
+; X86-FAST-NEXT: movl %esp, %ebp
; X86-FAST-NEXT: pushl %ebx
; X86-FAST-NEXT: pushl %edi
; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: pushl %eax
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: andl $-16, %esp
+; X86-FAST-NEXT: subl $16, %esp
+; X86-FAST-NEXT: movl 24(%ebp), %esi
+; X86-FAST-NEXT: movl 28(%ebp), %eax
+; X86-FAST-NEXT: movl 48(%ebp), %edx
+; X86-FAST-NEXT: movl 56(%ebp), %ecx
; X86-FAST-NEXT: testb $64, %cl
+; X86-FAST-NEXT: movl 52(%ebp), %ebx
; X86-FAST-NEXT: je .LBB6_1
; X86-FAST-NEXT: # %bb.2:
-; X86-FAST-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-FAST-NEXT: movl %edi, %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl %esi, %ebp
-; X86-FAST-NEXT: movl %ebx, %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl %esi, %edx
+; X86-FAST-NEXT: movl 32(%ebp), %esi
+; X86-FAST-NEXT: movl %ebx, %edi
+; X86-FAST-NEXT: movl %eax, %ebx
+; X86-FAST-NEXT: movl 36(%ebp), %eax
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: je .LBB6_4
; X86-FAST-NEXT: jmp .LBB6_5
; X86-FAST-NEXT: .LBB6_1:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: movl %ebp, (%esp) # 4-byte Spill
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-FAST-NEXT: movl 40(%ebp), %edi
+; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: movl 44(%ebp), %edi
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: jne .LBB6_5
; X86-FAST-NEXT: .LBB6_4:
-; X86-FAST-NEXT: movl %edi, %ebx
-; X86-FAST-NEXT: movl %esi, %edi
-; X86-FAST-NEXT: movl %edx, %esi
-; X86-FAST-NEXT: movl %ebp, %edx
-; X86-FAST-NEXT: movl (%esp), %ebp # 4-byte Reload
+; X86-FAST-NEXT: movl %esi, %eax
+; X86-FAST-NEXT: movl %ebx, %esi
+; X86-FAST-NEXT: movl %edx, %ebx
+; X86-FAST-NEXT: movl %edi, %edx
+; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-FAST-NEXT: .LBB6_5:
-; X86-FAST-NEXT: shrdl %cl, %edx, %ebp
-; X86-FAST-NEXT: shrdl %cl, %esi, %edx
-; X86-FAST-NEXT: shrdl %cl, %edi, %esi
+; X86-FAST-NEXT: shrdl %cl, %edx, %edi
+; X86-FAST-NEXT: shrdl %cl, %ebx, %edx
+; X86-FAST-NEXT: shrdl %cl, %esi, %ebx
; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-FAST-NEXT: shrdl %cl, %ebx, %edi
-; X86-FAST-NEXT: movl %edi, 12(%eax)
-; X86-FAST-NEXT: movl %esi, 8(%eax)
+; X86-FAST-NEXT: shrdl %cl, %eax, %esi
+; X86-FAST-NEXT: movl 8(%ebp), %eax
+; X86-FAST-NEXT: movl %esi, 12(%eax)
+; X86-FAST-NEXT: movl %ebx, 8(%eax)
; X86-FAST-NEXT: movl %edx, 4(%eax)
-; X86-FAST-NEXT: movl %ebp, (%eax)
-; X86-FAST-NEXT: addl $4, %esp
+; X86-FAST-NEXT: movl %edi, (%eax)
+; X86-FAST-NEXT: leal -12(%ebp), %esp
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: popl %edi
; X86-FAST-NEXT: popl %ebx
@@ -312,78 +314,88 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-LABEL: var_shift_i128:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: pushl %ebp
+; X86-SLOW-NEXT: movl %esp, %ebp
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: subl $8, %esp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLOW-NEXT: testb $64, %cl
+; X86-SLOW-NEXT: andl $-16, %esp
+; X86-SLOW-NEXT: subl $16, %esp
+; X86-SLOW-NEXT: movl 24(%ebp), %edx
+; X86-SLOW-NEXT: movl 28(%ebp), %esi
+; X86-SLOW-NEXT: movl 48(%ebp), %ebx
+; X86-SLOW-NEXT: movl 56(%ebp), %eax
+; X86-SLOW-NEXT: testb $64, %al
+; X86-SLOW-NEXT: movl 52(%ebp), %edi
; X86-SLOW-NEXT: je .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %ebp, %eax
-; X86-SLOW-NEXT: movl %ebx, %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: movl %edi, %edx
+; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl 32(%ebp), %edx
+; X86-SLOW-NEXT: movl %edi, %eax
; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: testb $32, %cl
-; X86-SLOW-NEXT: jne .LBB6_5
-; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %ebx, %esi
-; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebp, %edi
-; X86-SLOW-NEXT: movl %edx, %ebp
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: movl 36(%ebp), %esi
+; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: movl 40(%ebp), %eax
+; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl 44(%ebp), %eax
+; X86-SLOW-NEXT: .LBB6_3:
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: testb $32, %cl
; X86-SLOW-NEXT: je .LBB6_4
-; X86-SLOW-NEXT: .LBB6_5:
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: # %bb.5:
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: .LBB6_4:
+; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, %ebx
+; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: movl %ecx, %ebx
-; X86-SLOW-NEXT: notb %bl
-; X86-SLOW-NEXT: leal (%ebp,%ebp), %eax
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: orl %edx, %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: shrl %cl, %eax
+; X86-SLOW-NEXT: movl %eax, %edx
+; X86-SLOW-NEXT: movl %ecx, %eax
+; X86-SLOW-NEXT: notb %al
+; X86-SLOW-NEXT: movl %ebx, %edi
+; X86-SLOW-NEXT: addl %ebx, %ebx
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: shll %cl, %ebx
+; X86-SLOW-NEXT: orl %edx, %ebx
+; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: leal (%edi,%edi), %edx
-; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx
+; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: orl %ebp, %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: orl %edi, %edx
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl (%esp), %edi # 4-byte Reload
-; X86-SLOW-NEXT: leal (%edi,%edi), %ebp
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-SLOW-NEXT: leal (%edi,%edi), %ebx
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: shll %cl, %ebx
+; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: addl %esi, %esi
-; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %esi
; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: movl 8(%ebp), %ecx
; X86-SLOW-NEXT: movl %esi, 12(%ecx)
-; X86-SLOW-NEXT: movl %ebp, 8(%ecx)
+; X86-SLOW-NEXT: movl %ebx, 8(%ecx)
; X86-SLOW-NEXT: movl %edx, 4(%ecx)
+; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-SLOW-NEXT: movl %eax, (%ecx)
; X86-SLOW-NEXT: movl %ecx, %eax
-; X86-SLOW-NEXT: addl $8, %esp
+; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index a464d78f9af3..df97f49440f7 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -74,43 +74,57 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SSE2-LABEL: fshl_i128:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: pushl %ebx
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl 48(%ebp), %edi
+; X86-SSE2-NEXT: movl 52(%ebp), %eax
+; X86-SSE2-NEXT: movl 24(%ebp), %edx
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
; X86-SSE2-NEXT: testb $64, %cl
-; X86-SSE2-NEXT: movl %esi, %eax
-; X86-SSE2-NEXT: cmovnel %ebx, %eax
-; X86-SSE2-NEXT: movl %edx, %ebp
-; X86-SSE2-NEXT: cmovnel %edi, %ebp
-; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %edi
-; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %ebx
-; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx
-; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi
+; X86-SSE2-NEXT: movl %edx, %ecx
+; X86-SSE2-NEXT: cmovnel %edi, %ecx
+; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT: movl 28(%ebp), %esi
+; X86-SSE2-NEXT: movl %esi, %ebx
+; X86-SSE2-NEXT: cmovnel %eax, %ebx
+; X86-SSE2-NEXT: cmovnel 44(%ebp), %eax
+; X86-SSE2-NEXT: cmovnel 40(%ebp), %edi
+; X86-SSE2-NEXT: cmovel 36(%ebp), %esi
+; X86-SSE2-NEXT: cmovel 32(%ebp), %edx
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
; X86-SSE2-NEXT: testb $32, %cl
-; X86-SSE2-NEXT: cmovnel %esi, %edx
-; X86-SSE2-NEXT: cmovnel %ebp, %esi
-; X86-SSE2-NEXT: cmovnel %eax, %ebp
-; X86-SSE2-NEXT: cmovel %edi, %ebx
+; X86-SSE2-NEXT: cmovnel %edx, %esi
+; X86-SSE2-NEXT: cmovnel %ebx, %edx
+; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE2-NEXT: cmovnel %ecx, %ebx
; X86-SSE2-NEXT: cmovel %eax, %edi
-; X86-SSE2-NEXT: movl %edi, %eax
-; X86-SSE2-NEXT: shldl %cl, %ebx, %eax
-; X86-SSE2-NEXT: movl %ebp, %ebx
-; X86-SSE2-NEXT: shldl %cl, %edi, %ebx
-; X86-SSE2-NEXT: movl %esi, %edi
-; X86-SSE2-NEXT: shldl %cl, %ebp, %edi
+; X86-SSE2-NEXT: cmovel %ecx, %eax
+; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SSE2-NEXT: shldl %cl, %esi, %edx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movl %edx, 12(%ecx)
-; X86-SSE2-NEXT: movl %edi, 8(%ecx)
-; X86-SSE2-NEXT: movl %ebx, 4(%ecx)
-; X86-SSE2-NEXT: movl %eax, (%ecx)
-; X86-SSE2-NEXT: movl %ecx, %eax
+; X86-SSE2-NEXT: shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-SSE2-NEXT: movl %ebx, %edi
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
+; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SSE2-NEXT: shldl %cl, %eax, %edi
+; X86-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT: movl %edx, %edi
+; X86-SSE2-NEXT: movl 56(%ebp), %ecx
+; X86-SSE2-NEXT: shldl %cl, %ebx, %edi
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
+; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SSE2-NEXT: shldl %cl, %edx, %esi
+; X86-SSE2-NEXT: movl %esi, 12(%eax)
+; X86-SSE2-NEXT: movl %edi, 8(%eax)
+; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE2-NEXT: movl %ecx, 4(%eax)
+; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SSE2-NEXT: movl %ecx, (%eax)
+; X86-SSE2-NEXT: leal -12(%ebp), %esp
; X86-SSE2-NEXT: popl %esi
; X86-SSE2-NEXT: popl %edi
; X86-SSE2-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/i128-add.ll b/llvm/test/CodeGen/X86/i128-add.ll
index 2849e448a053..b4546c1e983c 100644
--- a/llvm/test/CodeGen/X86/i128-add.ll
+++ b/llvm/test/CodeGen/X86/i128-add.ll
@@ -5,17 +5,20 @@
define i128 @add_i128(i128 %x, i128 %y) nounwind {
; X86-LABEL: add_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: addl 40(%ebp), %esi
+; X86-NEXT: adcl 44(%ebp), %edi
+; X86-NEXT: adcl 48(%ebp), %ecx
+; X86-NEXT: adcl 52(%ebp), %edx
; X86-NEXT: addl $1, %esi
; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %ecx
@@ -24,8 +27,10 @@ define i128 @add_i128(i128 %x, i128 %y) nounwind {
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: add_i128:
diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
index be8f7923b8f9..2174d5056e6c 100644
--- a/llvm/test/CodeGen/X86/i128-fp128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
; Combined ABI tests for fp128 and i128
@@ -15,6 +15,167 @@
; RUN: sed 's/PrimTy/fp128/g' %s | sed 's/Prim0/0xL0/g' | llc -mtriple=i686-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC32
; RUN: sed 's/PrimTy/i128/g' %s | sed 's/Prim0/0/g' | llc -mtriple=i686-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC32
+define void @store(PrimTy %x, ptr %p) nounwind {
+; CHECK-X64-F128-LABEL: store:
+; CHECK-X64-F128: # %bb.0:
+; CHECK-X64-F128-NEXT: movaps %xmm0, (%rdi)
+; CHECK-X64-F128-NEXT: retq
+;
+; CHECK-X64-I128-LABEL: store:
+; CHECK-X64-I128: # %bb.0:
+; CHECK-X64-I128-NEXT: movq %rsi, 8(%rdx)
+; CHECK-X64-I128-NEXT: movq %rdi, (%rdx)
+; CHECK-X64-I128-NEXT: retq
+;
+; CHECK-MSVC64-F128-LABEL: store:
+; CHECK-MSVC64-F128: # %bb.0:
+; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%rdx)
+; CHECK-MSVC64-F128-NEXT: retq
+;
+; CHECK-MSVC64-I128-LABEL: store:
+; CHECK-MSVC64-I128: # %bb.0:
+; CHECK-MSVC64-I128-NEXT: movq %rdx, 8(%r8)
+; CHECK-MSVC64-I128-NEXT: movq %rcx, (%r8)
+; CHECK-MSVC64-I128-NEXT: retq
+;
+; CHECK-MINGW-F128-LABEL: store:
+; CHECK-MINGW-F128: # %bb.0:
+; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%rdx)
+; CHECK-MINGW-F128-NEXT: retq
+;
+; CHECK-MINGW-I128-LABEL: store:
+; CHECK-MINGW-I128: # %bb.0:
+; CHECK-MINGW-I128-NEXT: movq %rdx, 8(%r8)
+; CHECK-MINGW-I128-NEXT: movq %rcx, (%r8)
+; CHECK-MINGW-I128-NEXT: retq
+;
+; CHECK-X86-LABEL: store:
+; CHECK-X86: # %bb.0:
+; CHECK-X86-NEXT: pushl %edi
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 20(%esp), %ecx
+; CHECK-X86-NEXT: movl 24(%esp), %edx
+; CHECK-X86-NEXT: movl 28(%esp), %esi
+; CHECK-X86-NEXT: movl 32(%esp), %edi
+; CHECK-X86-NEXT: movl %esi, 12(%edi)
+; CHECK-X86-NEXT: movl %edx, 8(%edi)
+; CHECK-X86-NEXT: movl %ecx, 4(%edi)
+; CHECK-X86-NEXT: movl %eax, (%edi)
+; CHECK-X86-NEXT: addl $4, %esp
+; CHECK-X86-NEXT: popl %esi
+; CHECK-X86-NEXT: popl %edi
+; CHECK-X86-NEXT: retl
+;
+; CHECK-MSVC32-LABEL: store:
+; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %edi
+; CHECK-MSVC32-NEXT: pushl %esi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 24(%ebp), %edi
+; CHECK-MSVC32-NEXT: movl %esi, 12(%edi)
+; CHECK-MSVC32-NEXT: movl %edx, 8(%edi)
+; CHECK-MSVC32-NEXT: movl %ecx, 4(%edi)
+; CHECK-MSVC32-NEXT: movl %eax, (%edi)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
+; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
+; CHECK-MSVC32-NEXT: retl
+ store PrimTy %x, ptr %p
+ ret void
+}
+
+; Illustrate stack alignment
+define void @store_perturbed(i8 %_0, PrimTy %x, ptr %p) nounwind {
+; CHECK-X64-F128-LABEL: store_perturbed:
+; CHECK-X64-F128: # %bb.0:
+; CHECK-X64-F128-NEXT: movaps %xmm0, (%rsi)
+; CHECK-X64-F128-NEXT: retq
+;
+; CHECK-X64-I128-LABEL: store_perturbed:
+; CHECK-X64-I128: # %bb.0:
+; CHECK-X64-I128-NEXT: movq %rdx, 8(%rcx)
+; CHECK-X64-I128-NEXT: movq %rsi, (%rcx)
+; CHECK-X64-I128-NEXT: retq
+;
+; CHECK-MSVC64-F128-LABEL: store_perturbed:
+; CHECK-MSVC64-F128: # %bb.0:
+; CHECK-MSVC64-F128-NEXT: movaps (%rdx), %xmm0
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%r8)
+; CHECK-MSVC64-F128-NEXT: retq
+;
+; CHECK-MSVC64-I128-LABEL: store_perturbed:
+; CHECK-MSVC64-I128: # %bb.0:
+; CHECK-MSVC64-I128-NEXT: movq %r8, 8(%r9)
+; CHECK-MSVC64-I128-NEXT: movq %rdx, (%r9)
+; CHECK-MSVC64-I128-NEXT: retq
+;
+; CHECK-MINGW-F128-LABEL: store_perturbed:
+; CHECK-MINGW-F128: # %bb.0:
+; CHECK-MINGW-F128-NEXT: movaps (%rdx), %xmm0
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%r8)
+; CHECK-MINGW-F128-NEXT: retq
+;
+; CHECK-MINGW-I128-LABEL: store_perturbed:
+; CHECK-MINGW-I128: # %bb.0:
+; CHECK-MINGW-I128-NEXT: movq %r8, 8(%r9)
+; CHECK-MINGW-I128-NEXT: movq %rdx, (%r9)
+; CHECK-MINGW-I128-NEXT: retq
+;
+; CHECK-X86-LABEL: store_perturbed:
+; CHECK-X86: # %bb.0:
+; CHECK-X86-NEXT: pushl %edi
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 32(%esp), %eax
+; CHECK-X86-NEXT: movl 36(%esp), %ecx
+; CHECK-X86-NEXT: movl 40(%esp), %edx
+; CHECK-X86-NEXT: movl 44(%esp), %esi
+; CHECK-X86-NEXT: movl 48(%esp), %edi
+; CHECK-X86-NEXT: movl %esi, 12(%edi)
+; CHECK-X86-NEXT: movl %edx, 8(%edi)
+; CHECK-X86-NEXT: movl %ecx, 4(%edi)
+; CHECK-X86-NEXT: movl %eax, (%edi)
+; CHECK-X86-NEXT: addl $4, %esp
+; CHECK-X86-NEXT: popl %esi
+; CHECK-X86-NEXT: popl %edi
+; CHECK-X86-NEXT: retl
+;
+; CHECK-MSVC32-LABEL: store_perturbed:
+; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %edi
+; CHECK-MSVC32-NEXT: pushl %esi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 24(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 28(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 32(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 36(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 40(%ebp), %edi
+; CHECK-MSVC32-NEXT: movl %esi, 12(%edi)
+; CHECK-MSVC32-NEXT: movl %edx, 8(%edi)
+; CHECK-MSVC32-NEXT: movl %ecx, 4(%edi)
+; CHECK-MSVC32-NEXT: movl %eax, (%edi)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
+; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
+; CHECK-MSVC32-NEXT: retl
+ store PrimTy %x, ptr %p
+ ret void
+}
+
define PrimTy @return(ptr %p) nounwind {
; CHECK-X64-F128-LABEL: return:
; CHECK-X64-F128: # %bb.0:
@@ -53,8 +214,8 @@ define PrimTy @return(ptr %p) nounwind {
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-X86-NEXT: movl 12(%esp), %eax
+; CHECK-X86-NEXT: movl 16(%esp), %ecx
; CHECK-X86-NEXT: movl (%ecx), %edx
; CHECK-X86-NEXT: movl 4(%ecx), %esi
; CHECK-X86-NEXT: movl 8(%ecx), %edi
@@ -71,8 +232,8 @@ define PrimTy @return(ptr %p) nounwind {
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-MSVC32-NEXT: movl 12(%esp), %eax
+; CHECK-MSVC32-NEXT: movl 16(%esp), %ecx
; CHECK-MSVC32-NEXT: movl (%ecx), %edx
; CHECK-MSVC32-NEXT: movl 4(%ecx), %esi
; CHECK-MSVC32-NEXT: movl 8(%ecx), %edi
@@ -123,38 +284,47 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 32(%esp), %ecx
+; CHECK-X86-NEXT: movl 36(%esp), %edx
+; CHECK-X86-NEXT: movl 40(%esp), %esi
+; CHECK-X86-NEXT: movl 44(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
; CHECK-X86-NEXT: movl %ecx, (%eax)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl $4
;
; CHECK-MSVC32-LABEL: first_arg:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 24(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 28(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 32(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 36(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
; CHECK-MSVC32-NEXT: movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
ret PrimTy %x
}
+; Leading args such that i128 is the last possible position where it still
+; gets passed in registers.
define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounwind {
; CHECK-X64-F128-LABEL: leading_args:
; CHECK-X64-F128: # %bb.0:
@@ -168,64 +338,72 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw
;
; CHECK-MSVC64-F128-LABEL: leading_args:
; CHECK-MSVC64-F128: # %bb.0:
-; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq 40(%rsp), %rax
; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MSVC64-F128-NEXT: retq
;
; CHECK-MSVC64-I128-LABEL: leading_args:
; CHECK-MSVC64-I128: # %bb.0:
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT: movq 40(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT: movq 48(%rsp), %rdx
; CHECK-MSVC64-I128-NEXT: retq
;
; CHECK-MINGW-F128-LABEL: leading_args:
; CHECK-MINGW-F128: # %bb.0:
-; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq 40(%rsp), %rax
; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MINGW-F128-NEXT: retq
;
; CHECK-MINGW-I128-LABEL: leading_args:
; CHECK-MINGW-I128: # %bb.0:
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT: movq 40(%rsp), %rax
+; CHECK-MINGW-I128-NEXT: movq 48(%rsp), %rdx
; CHECK-MINGW-I128-NEXT: retq
;
; CHECK-X86-LABEL: leading_args:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 64(%esp), %ecx
+; CHECK-X86-NEXT: movl 68(%esp), %edx
+; CHECK-X86-NEXT: movl 72(%esp), %esi
+; CHECK-X86-NEXT: movl 76(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
; CHECK-X86-NEXT: movl %ecx, (%eax)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl $4
;
; CHECK-MSVC32-LABEL: leading_args:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 56(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 60(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 64(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 68(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
; CHECK-MSVC32-NEXT: movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
ret PrimTy %x
}
+; The i128 of interest must be in memory.
define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy %_5, PrimTy %x) nounwind {
; CHECK-X64-F128-LABEL: many_leading_args:
; CHECK-X64-F128: # %bb.0:
@@ -234,66 +412,73 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr
;
; CHECK-X64-I128-LABEL: many_leading_args:
; CHECK-X64-I128: # %bb.0:
-; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-X64-I128-NEXT: movq 24(%rsp), %rax
+; CHECK-X64-I128-NEXT: movq 32(%rsp), %rdx
; CHECK-X64-I128-NEXT: retq
;
; CHECK-MSVC64-F128-LABEL: many_leading_args:
; CHECK-MSVC64-F128: # %bb.0:
-; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq 56(%rsp), %rax
; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MSVC64-F128-NEXT: retq
;
; CHECK-MSVC64-I128-LABEL: many_leading_args:
; CHECK-MSVC64-I128: # %bb.0:
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT: movq 64(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT: movq 72(%rsp), %rdx
; CHECK-MSVC64-I128-NEXT: retq
;
; CHECK-MINGW-F128-LABEL: many_leading_args:
; CHECK-MINGW-F128: # %bb.0:
-; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq 56(%rsp), %rax
; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MINGW-F128-NEXT: retq
;
; CHECK-MINGW-I128-LABEL: many_leading_args:
; CHECK-MINGW-I128: # %bb.0:
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT: movq 64(%rsp), %rax
+; CHECK-MINGW-I128-NEXT: movq 72(%rsp), %rdx
; CHECK-MINGW-I128-NEXT: retq
;
; CHECK-X86-LABEL: many_leading_args:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 80(%esp), %ecx
+; CHECK-X86-NEXT: movl 84(%esp), %edx
+; CHECK-X86-NEXT: movl 88(%esp), %esi
+; CHECK-X86-NEXT: movl 92(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
; CHECK-X86-NEXT: movl %ecx, (%eax)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl $4
;
; CHECK-MSVC32-LABEL: many_leading_args:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 72(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 76(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 80(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 84(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
; CHECK-MSVC32-NEXT: movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
ret PrimTy %x
}
@@ -305,66 +490,73 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy
;
; CHECK-X64-I128-LABEL: trailing_arg:
; CHECK-X64-I128: # %bb.0:
-; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-X64-I128-NEXT: movq 8(%rsp), %rax
+; CHECK-X64-I128-NEXT: movq 16(%rsp), %rdx
; CHECK-X64-I128-NEXT: retq
;
; CHECK-MSVC64-F128-LABEL: trailing_arg:
; CHECK-MSVC64-F128: # %bb.0:
-; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq 48(%rsp), %rax
; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MSVC64-F128-NEXT: retq
;
; CHECK-MSVC64-I128-LABEL: trailing_arg:
; CHECK-MSVC64-I128: # %bb.0:
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MSVC64-I128-NEXT: movq 48(%rsp), %rax
+; CHECK-MSVC64-I128-NEXT: movq 56(%rsp), %rdx
; CHECK-MSVC64-I128-NEXT: retq
;
; CHECK-MINGW-F128-LABEL: trailing_arg:
; CHECK-MINGW-F128: # %bb.0:
-; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq 48(%rsp), %rax
; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0
; CHECK-MINGW-F128-NEXT: retq
;
; CHECK-MINGW-I128-LABEL: trailing_arg:
; CHECK-MINGW-I128: # %bb.0:
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-MINGW-I128-NEXT: movq 48(%rsp), %rax
+; CHECK-MINGW-I128-NEXT: movq 56(%rsp), %rdx
; CHECK-MINGW-I128-NEXT: retq
;
; CHECK-X86-LABEL: trailing_arg:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: pushl %edi
; CHECK-X86-NEXT: pushl %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl 64(%esp), %ecx
+; CHECK-X86-NEXT: movl 68(%esp), %edx
+; CHECK-X86-NEXT: movl 72(%esp), %esi
+; CHECK-X86-NEXT: movl 76(%esp), %edi
; CHECK-X86-NEXT: movl %edi, 12(%eax)
; CHECK-X86-NEXT: movl %esi, 8(%eax)
; CHECK-X86-NEXT: movl %edx, 4(%eax)
; CHECK-X86-NEXT: movl %ecx, (%eax)
+; CHECK-X86-NEXT: addl $4, %esp
; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: popl %edi
; CHECK-X86-NEXT: retl $4
;
; CHECK-MSVC32-LABEL: trailing_arg:
; CHECK-MSVC32: # %bb.0:
+; CHECK-MSVC32-NEXT: pushl %ebp
+; CHECK-MSVC32-NEXT: movl %esp, %ebp
; CHECK-MSVC32-NEXT: pushl %edi
; CHECK-MSVC32-NEXT: pushl %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-MSVC32-NEXT: andl $-16, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 56(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 60(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 64(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl 68(%ebp), %edi
; CHECK-MSVC32-NEXT: movl %edi, 12(%eax)
; CHECK-MSVC32-NEXT: movl %esi, 8(%eax)
; CHECK-MSVC32-NEXT: movl %edx, 4(%eax)
; CHECK-MSVC32-NEXT: movl %ecx, (%eax)
+; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp
; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %edi
+; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
ret PrimTy %x
}
@@ -388,8 +580,8 @@ define void @call_first_arg(PrimTy %x) nounwind {
; CHECK-MSVC64-F128: # %bb.0:
; CHECK-MSVC64-F128-NEXT: subq $56, %rsp
; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 32(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 32(%rsp), %rcx
; CHECK-MSVC64-F128-NEXT: callq first_arg
; CHECK-MSVC64-F128-NEXT: addq $56, %rsp
; CHECK-MSVC64-F128-NEXT: retq
@@ -405,8 +597,8 @@ define void @call_first_arg(PrimTy %x) nounwind {
; CHECK-MINGW-F128: # %bb.0:
; CHECK-MINGW-F128-NEXT: subq $56, %rsp
; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, 32(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 32(%rsp), %rcx
; CHECK-MINGW-F128-NEXT: callq first_arg
; CHECK-MINGW-F128-NEXT: addq $56, %rsp
; CHECK-MINGW-F128-NEXT: retq
@@ -420,32 +612,43 @@ define void @call_first_arg(PrimTy %x) nounwind {
;
; CHECK-X86-LABEL: call_first_arg:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: subl $56, %esp
+; CHECK-X86-NEXT: movl 64(%esp), %eax
+; CHECK-X86-NEXT: movl 68(%esp), %ecx
+; CHECK-X86-NEXT: movl 72(%esp), %edx
+; CHECK-X86-NEXT: movl 76(%esp), %esi
+; CHECK-X86-NEXT: movl %esi, 28(%esp)
+; CHECK-X86-NEXT: movl %edx, 24(%esp)
+; CHECK-X86-NEXT: movl %ecx, 20(%esp)
+; CHECK-X86-NEXT: movl %eax, 16(%esp)
+; CHECK-X86-NEXT: leal 32(%esp), %eax
+; CHECK-X86-NEXT: movl %eax, (%esp)
; CHECK-X86-NEXT: calll first_arg@PLT
-; CHECK-X86-NEXT: addl $56, %esp
+; CHECK-X86-NEXT: addl $52, %esp
+; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: call_first_arg:
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %ebp
; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %esi
; CHECK-MSVC32-NEXT: andl $-16, %esp
-; CHECK-MSVC32-NEXT: subl $32, %esp
-; CHECK-MSVC32-NEXT: movl %esp, %eax
-; CHECK-MSVC32-NEXT: pushl 20(%ebp)
-; CHECK-MSVC32-NEXT: pushl 16(%ebp)
-; CHECK-MSVC32-NEXT: pushl 12(%ebp)
-; CHECK-MSVC32-NEXT: pushl 8(%ebp)
-; CHECK-MSVC32-NEXT: pushl %eax
+; CHECK-MSVC32-NEXT: subl $64, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl %esi, 28(%esp)
+; CHECK-MSVC32-NEXT: movl %edx, 24(%esp)
+; CHECK-MSVC32-NEXT: movl %ecx, 20(%esp)
+; CHECK-MSVC32-NEXT: movl %eax, 16(%esp)
+; CHECK-MSVC32-NEXT: leal 32(%esp), %eax
+; CHECK-MSVC32-NEXT: movl %eax, (%esp)
; CHECK-MSVC32-NEXT: calll _first_arg
-; CHECK-MSVC32-NEXT: addl $20, %esp
-; CHECK-MSVC32-NEXT: movl %ebp, %esp
+; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
call PrimTy @first_arg(PrimTy %x)
@@ -481,9 +684,9 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-MSVC64-F128: # %bb.0:
; CHECK-MSVC64-F128-NEXT: subq $72, %rsp
; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d
@@ -495,8 +698,8 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-MSVC64-I128-LABEL: call_leading_args:
; CHECK-MSVC64-I128: # %bb.0:
; CHECK-MSVC64-I128-NEXT: subq $56, %rsp
-; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rdx, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rcx, 32(%rsp)
; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d
@@ -509,9 +712,9 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-MINGW-F128: # %bb.0:
; CHECK-MINGW-F128-NEXT: subq $72, %rsp
; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-F128-NEXT: xorl %edx, %edx
; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d
@@ -523,8 +726,8 @@ define void @call_leading_args(PrimTy %x) nounwind {
; CHECK-MINGW-I128-LABEL: call_leading_args:
; CHECK-MINGW-I128: # %bb.0:
; CHECK-MINGW-I128-NEXT: subq $56, %rsp
-; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rdx, 40(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rcx, 32(%rsp)
; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-I128-NEXT: xorl %edx, %edx
; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d
@@ -535,48 +738,59 @@ define void @call_leading_args(PrimTy %x) nounwind {
;
; CHECK-X86-LABEL: call_leading_args:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: subl $88, %esp
+; CHECK-X86-NEXT: movl 96(%esp), %eax
+; CHECK-X86-NEXT: movl 100(%esp), %ecx
+; CHECK-X86-NEXT: movl 104(%esp), %edx
+; CHECK-X86-NEXT: movl 108(%esp), %esi
+; CHECK-X86-NEXT: movl %esi, 60(%esp)
+; CHECK-X86-NEXT: movl %edx, 56(%esp)
+; CHECK-X86-NEXT: movl %ecx, 52(%esp)
+; CHECK-X86-NEXT: movl %eax, 48(%esp)
+; CHECK-X86-NEXT: leal 64(%esp), %eax
+; CHECK-X86-NEXT: movl %eax, (%esp)
+; CHECK-X86-NEXT: movl $0, 32(%esp)
+; CHECK-X86-NEXT: movl $0, 28(%esp)
+; CHECK-X86-NEXT: movl $0, 24(%esp)
+; CHECK-X86-NEXT: movl $0, 20(%esp)
+; CHECK-X86-NEXT: movl $0, 16(%esp)
+; CHECK-X86-NEXT: movl $0, 12(%esp)
+; CHECK-X86-NEXT: movl $0, 8(%esp)
+; CHECK-X86-NEXT: movl $0, 4(%esp)
; CHECK-X86-NEXT: calll leading_args@PLT
-; CHECK-X86-NEXT: addl $88, %esp
+; CHECK-X86-NEXT: addl $84, %esp
+; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: call_leading_args:
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %ebp
; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %esi
; CHECK-MSVC32-NEXT: andl $-16, %esp
-; CHECK-MSVC32-NEXT: subl $32, %esp
-; CHECK-MSVC32-NEXT: movl %esp, %eax
-; CHECK-MSVC32-NEXT: pushl 20(%ebp)
-; CHECK-MSVC32-NEXT: pushl 16(%ebp)
-; CHECK-MSVC32-NEXT: pushl 12(%ebp)
-; CHECK-MSVC32-NEXT: pushl 8(%ebp)
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl %eax
+; CHECK-MSVC32-NEXT: subl $96, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl %esi, 60(%esp)
+; CHECK-MSVC32-NEXT: movl %edx, 56(%esp)
+; CHECK-MSVC32-NEXT: movl %ecx, 52(%esp)
+; CHECK-MSVC32-NEXT: movl %eax, 48(%esp)
+; CHECK-MSVC32-NEXT: leal 64(%esp), %eax
+; CHECK-MSVC32-NEXT: movl %eax, (%esp)
+; CHECK-MSVC32-NEXT: movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 4(%esp)
; CHECK-MSVC32-NEXT: calll _leading_args
-; CHECK-MSVC32-NEXT: addl $52, %esp
-; CHECK-MSVC32-NEXT: movl %ebp, %esp
+; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
call PrimTy @leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy %x)
@@ -620,12 +834,12 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-MSVC64-F128-NEXT: subq $88, %rsp
; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
; CHECK-MSVC64-F128-NEXT: xorps %xmm1, %xmm1
-; CHECK-MSVC64-F128-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT: movaps %xmm1, 64(%rsp)
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq %rax, 40(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 64(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d
@@ -637,10 +851,10 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-MSVC64-I128-LABEL: call_many_leading_args:
; CHECK-MSVC64-I128: # %bb.0:
; CHECK-MSVC64-I128-NEXT: subq $72, %rsp
-; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rdx, 56(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rcx, 48(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq $0, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq $0, 32(%rsp)
; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d
@@ -654,12 +868,12 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-MINGW-F128-NEXT: subq $88, %rsp
; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
; CHECK-MINGW-F128-NEXT: xorps %xmm1, %xmm1
-; CHECK-MINGW-F128-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT: movaps %xmm1, 64(%rsp)
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq %rax, 40(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 64(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-F128-NEXT: xorl %edx, %edx
; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d
@@ -671,10 +885,10 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
; CHECK-MINGW-I128-LABEL: call_many_leading_args:
; CHECK-MINGW-I128: # %bb.0:
; CHECK-MINGW-I128-NEXT: subq $72, %rsp
-; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rdx, 56(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rcx, 48(%rsp)
+; CHECK-MINGW-I128-NEXT: movq $0, 40(%rsp)
+; CHECK-MINGW-I128-NEXT: movq $0, 32(%rsp)
; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-I128-NEXT: xorl %edx, %edx
; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d
@@ -685,56 +899,67 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
;
; CHECK-X86-LABEL: call_many_leading_args:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: subl $104, %esp
+; CHECK-X86-NEXT: movl 112(%esp), %eax
+; CHECK-X86-NEXT: movl 116(%esp), %ecx
+; CHECK-X86-NEXT: movl 120(%esp), %edx
+; CHECK-X86-NEXT: movl 124(%esp), %esi
+; CHECK-X86-NEXT: movl %esi, 76(%esp)
+; CHECK-X86-NEXT: movl %edx, 72(%esp)
+; CHECK-X86-NEXT: movl %ecx, 68(%esp)
+; CHECK-X86-NEXT: movl %eax, 64(%esp)
+; CHECK-X86-NEXT: leal 80(%esp), %eax
+; CHECK-X86-NEXT: movl %eax, (%esp)
+; CHECK-X86-NEXT: movl $0, 60(%esp)
+; CHECK-X86-NEXT: movl $0, 56(%esp)
+; CHECK-X86-NEXT: movl $0, 52(%esp)
+; CHECK-X86-NEXT: movl $0, 48(%esp)
+; CHECK-X86-NEXT: movl $0, 32(%esp)
+; CHECK-X86-NEXT: movl $0, 28(%esp)
+; CHECK-X86-NEXT: movl $0, 24(%esp)
+; CHECK-X86-NEXT: movl $0, 20(%esp)
+; CHECK-X86-NEXT: movl $0, 16(%esp)
+; CHECK-X86-NEXT: movl $0, 12(%esp)
+; CHECK-X86-NEXT: movl $0, 8(%esp)
+; CHECK-X86-NEXT: movl $0, 4(%esp)
; CHECK-X86-NEXT: calll many_leading_args@PLT
-; CHECK-X86-NEXT: addl $104, %esp
+; CHECK-X86-NEXT: addl $100, %esp
+; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: call_many_leading_args:
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %ebp
; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %esi
; CHECK-MSVC32-NEXT: andl $-16, %esp
-; CHECK-MSVC32-NEXT: subl $32, %esp
-; CHECK-MSVC32-NEXT: movl %esp, %eax
-; CHECK-MSVC32-NEXT: pushl 20(%ebp)
-; CHECK-MSVC32-NEXT: pushl 16(%ebp)
-; CHECK-MSVC32-NEXT: pushl 12(%ebp)
-; CHECK-MSVC32-NEXT: pushl 8(%ebp)
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl %eax
+; CHECK-MSVC32-NEXT: subl $112, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl %esi, 76(%esp)
+; CHECK-MSVC32-NEXT: movl %edx, 72(%esp)
+; CHECK-MSVC32-NEXT: movl %ecx, 68(%esp)
+; CHECK-MSVC32-NEXT: movl %eax, 64(%esp)
+; CHECK-MSVC32-NEXT: leal 80(%esp), %eax
+; CHECK-MSVC32-NEXT: movl %eax, (%esp)
+; CHECK-MSVC32-NEXT: movl $0, 60(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 56(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 52(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 48(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 4(%esp)
; CHECK-MSVC32-NEXT: calll _many_leading_args
-; CHECK-MSVC32-NEXT: addl $68, %esp
-; CHECK-MSVC32-NEXT: movl %ebp, %esp
+; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
call PrimTy @many_leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy Prim0, PrimTy %x)
@@ -770,9 +995,9 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-MSVC64-F128: # %bb.0:
; CHECK-MSVC64-F128-NEXT: subq $72, %rsp
; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d
@@ -784,8 +1009,8 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-MSVC64-I128-LABEL: call_trailing_arg:
; CHECK-MSVC64-I128: # %bb.0:
; CHECK-MSVC64-I128-NEXT: subq $56, %rsp
-; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rdx, 40(%rsp)
+; CHECK-MSVC64-I128-NEXT: movq %rcx, 32(%rsp)
; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx
; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx
; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d
@@ -798,9 +1023,9 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-MINGW-F128: # %bb.0:
; CHECK-MINGW-F128-NEXT: subq $72, %rsp
; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0
-; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp)
+; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp)
; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-F128-NEXT: xorl %edx, %edx
; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d
@@ -812,8 +1037,8 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
; CHECK-MINGW-I128-LABEL: call_trailing_arg:
; CHECK-MINGW-I128: # %bb.0:
; CHECK-MINGW-I128-NEXT: subq $56, %rsp
-; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rdx, 40(%rsp)
+; CHECK-MINGW-I128-NEXT: movq %rcx, 32(%rsp)
; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx
; CHECK-MINGW-I128-NEXT: xorl %edx, %edx
; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d
@@ -824,48 +1049,59 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
;
; CHECK-X86-LABEL: call_trailing_arg:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: subl $40, %esp
-; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl $0
-; CHECK-X86-NEXT: pushl %eax
+; CHECK-X86-NEXT: pushl %esi
+; CHECK-X86-NEXT: subl $88, %esp
+; CHECK-X86-NEXT: movl 96(%esp), %eax
+; CHECK-X86-NEXT: movl 100(%esp), %ecx
+; CHECK-X86-NEXT: movl 104(%esp), %edx
+; CHECK-X86-NEXT: movl 108(%esp), %esi
+; CHECK-X86-NEXT: movl %esi, 60(%esp)
+; CHECK-X86-NEXT: movl %edx, 56(%esp)
+; CHECK-X86-NEXT: movl %ecx, 52(%esp)
+; CHECK-X86-NEXT: movl %eax, 48(%esp)
+; CHECK-X86-NEXT: leal 64(%esp), %eax
+; CHECK-X86-NEXT: movl %eax, (%esp)
+; CHECK-X86-NEXT: movl $0, 32(%esp)
+; CHECK-X86-NEXT: movl $0, 28(%esp)
+; CHECK-X86-NEXT: movl $0, 24(%esp)
+; CHECK-X86-NEXT: movl $0, 20(%esp)
+; CHECK-X86-NEXT: movl $0, 16(%esp)
+; CHECK-X86-NEXT: movl $0, 12(%esp)
+; CHECK-X86-NEXT: movl $0, 8(%esp)
+; CHECK-X86-NEXT: movl $0, 4(%esp)
; CHECK-X86-NEXT: calll trailing_arg@PLT
-; CHECK-X86-NEXT: addl $88, %esp
+; CHECK-X86-NEXT: addl $84, %esp
+; CHECK-X86-NEXT: popl %esi
; CHECK-X86-NEXT: retl
;
; CHECK-MSVC32-LABEL: call_trailing_arg:
; CHECK-MSVC32: # %bb.0:
; CHECK-MSVC32-NEXT: pushl %ebp
; CHECK-MSVC32-NEXT: movl %esp, %ebp
+; CHECK-MSVC32-NEXT: pushl %esi
; CHECK-MSVC32-NEXT: andl $-16, %esp
-; CHECK-MSVC32-NEXT: subl $32, %esp
-; CHECK-MSVC32-NEXT: movl %esp, %eax
-; CHECK-MSVC32-NEXT: pushl 20(%ebp)
-; CHECK-MSVC32-NEXT: pushl 16(%ebp)
-; CHECK-MSVC32-NEXT: pushl 12(%ebp)
-; CHECK-MSVC32-NEXT: pushl 8(%ebp)
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl $0
-; CHECK-MSVC32-NEXT: pushl %eax
+; CHECK-MSVC32-NEXT: subl $96, %esp
+; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax
+; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx
+; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx
+; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi
+; CHECK-MSVC32-NEXT: movl %esi, 60(%esp)
+; CHECK-MSVC32-NEXT: movl %edx, 56(%esp)
+; CHECK-MSVC32-NEXT: movl %ecx, 52(%esp)
+; CHECK-MSVC32-NEXT: movl %eax, 48(%esp)
+; CHECK-MSVC32-NEXT: leal 64(%esp), %eax
+; CHECK-MSVC32-NEXT: movl %eax, (%esp)
+; CHECK-MSVC32-NEXT: movl $0, 32(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 28(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 24(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 20(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 16(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 12(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 8(%esp)
+; CHECK-MSVC32-NEXT: movl $0, 4(%esp)
; CHECK-MSVC32-NEXT: calll _trailing_arg
-; CHECK-MSVC32-NEXT: addl $52, %esp
-; CHECK-MSVC32-NEXT: movl %ebp, %esp
+; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp
+; CHECK-MSVC32-NEXT: popl %esi
; CHECK-MSVC32-NEXT: popl %ebp
; CHECK-MSVC32-NEXT: retl
call PrimTy @trailing_arg(i64 0, i64 0, i64 0, i64 0, PrimTy %x)
diff --git a/llvm/test/CodeGen/X86/i128-sdiv.ll b/llvm/test/CodeGen/X86/i128-sdiv.ll
index 717f52f198ee..7d5757392c98 100644
--- a/llvm/test/CodeGen/X86/i128-sdiv.ll
+++ b/llvm/test/CodeGen/X86/i128-sdiv.ll
@@ -8,18 +8,21 @@
define i128 @test1(i128 %x) nounwind {
; X86-LABEL: test1:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 36(%ebp), %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: movl %esi, %edx
; X86-NEXT: shrl $30, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: addl %esi, %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: adcl 28(%ebp), %esi
+; X86-NEXT: adcl 32(%ebp), %edx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: shrdl $2, %ecx, %edx
; X86-NEXT: movl %ecx, %esi
@@ -29,8 +32,10 @@ define i128 @test1(i128 %x) nounwind {
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test1:
@@ -52,38 +57,44 @@ define i128 @test1(i128 %x) nounwind {
define i128 @test2(i128 %x) nounwind {
; X86-LABEL: test2:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: shrl $30, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: addl %esi, %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: shrdl $2, %edx, %ecx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: adcl 28(%ebp), %edx
+; X86-NEXT: adcl 32(%ebp), %ecx
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: shrdl $2, %eax, %ecx
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: sarl $31, %esi
-; X86-NEXT: sarl $2, %edx
-; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: sarl $2, %eax
+; X86-NEXT: xorl %edx, %edx
; X86-NEXT: negl %ecx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %esi, %ebx
; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test2:
diff --git a/llvm/test/CodeGen/X86/i128-udiv.ll b/llvm/test/CodeGen/X86/i128-udiv.ll
index 3f890b7f2443..901183242132 100644
--- a/llvm/test/CodeGen/X86/i128-udiv.ll
+++ b/llvm/test/CodeGen/X86/i128-udiv.ll
@@ -8,15 +8,21 @@
define i128 @test1(i128 %x) nounwind {
; X86-LABEL: test1:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %edx
; X86-NEXT: shrdl $2, %edx, %ecx
; X86-NEXT: shrl $2, %edx
; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll
index 55c318e87a5a..bdceeefbcfab 100644
--- a/llvm/test/CodeGen/X86/iabs.ll
+++ b/llvm/test/CodeGen/X86/iabs.ll
@@ -123,31 +123,34 @@ define i64 @test_i64(i64 %a) nounwind {
define i128 @test_i128(i128 %a) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: subl %edx, %ebx
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl %ebx, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: test_i128:
diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
index c52b3ed6c926..4a6c1d0ae5de 100644
--- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
@@ -10,33 +10,39 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; X86-LABEL: opt_setcc_lt_power_of_2:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl 24(%ebp), %esi
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB0_1: # %loop
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: addl $1, %edi
-; X86-NEXT: adcl $0, %esi
+; X86-NEXT: addl $1, %esi
+; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: orl %ecx, %ebx
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: orl %edx, %ebp
-; X86-NEXT: orl %ecx, %ebp
-; X86-NEXT: shrdl $28, %ebx, %ebp
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: shrdl $28, %ebx, %esi
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %exit
-; X86-NEXT: movl %edi, (%eax)
-; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -73,15 +79,21 @@ exit:
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_srl_eq_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: orl 20(%ebp), %ecx
+; X86-NEXT: movl 8(%ebp), %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: shldl $15, %edx, %ecx
; X86-NEXT: sete %al
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_srl_eq_zero:
@@ -98,15 +110,21 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_srl_ne_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: orl 20(%ebp), %ecx
+; X86-NEXT: movl 8(%ebp), %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: shldl $15, %edx, %ecx
; X86-NEXT: setne %al
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_srl_ne_zero:
@@ -123,13 +141,19 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_eq_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 20(%ebp), %ecx
; X86-NEXT: shll $17, %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl 8(%ebp), %eax
+; X86-NEXT: orl 12(%ebp), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: sete %al
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_eq_zero:
@@ -146,13 +170,19 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_ne_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl 20(%ebp), %ecx
; X86-NEXT: shll $17, %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl 8(%ebp), %eax
+; X86-NEXT: orl 12(%ebp), %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: setne %al
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_ne_zero:
@@ -170,13 +200,17 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 20(%ebp), %esi
; X86-NEXT: shldl $17, %edx, %esi
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
@@ -194,9 +228,11 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $16, %esp
; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
diff --git a/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll b/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll
index 1f3a458ecf3a..17065a4a61c2 100644
--- a/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll
+++ b/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll
@@ -1,10 +1,10 @@
; RUN: not llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s
-; CHECK: LLVM ERROR: cannot lower calls with arbitrary operand bundles!
+; CHECK: LLVM ERROR: cannot lower calls with arbitrary operand bundles: foo, bar, baz
declare void @g()
define void @f(i32 %arg) {
- call void @g() [ "foo"(i32 %arg) ]
+ call void @g() [ "foo"(i32 %arg), "bar"(i32 %arg), "baz"(i32 %arg) ]
ret void
}
diff --git a/llvm/test/CodeGen/X86/invalid-operand-bundle-callbr.ll b/llvm/test/CodeGen/X86/invalid-operand-bundle-callbr.ll
index 56157d205b1c..79bddc075541 100644
--- a/llvm/test/CodeGen/X86/invalid-operand-bundle-callbr.ll
+++ b/llvm/test/CodeGen/X86/invalid-operand-bundle-callbr.ll
@@ -1,6 +1,6 @@
; RUN: not llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s
-; CHECK: LLVM ERROR: cannot lower callbrs with arbitrary operand bundles!
+; CHECK: LLVM ERROR: cannot lower callbrs with arbitrary operand bundles: foo
define void @f(i32 %arg) {
callbr void asm "", ""() [ "foo"(i32 %arg) ]
diff --git a/llvm/test/CodeGen/X86/invalid-operand-bundle-invoke.ll b/llvm/test/CodeGen/X86/invalid-operand-bundle-invoke.ll
index 8091a220a44c..1da41aeab68b 100644
--- a/llvm/test/CodeGen/X86/invalid-operand-bundle-invoke.ll
+++ b/llvm/test/CodeGen/X86/invalid-operand-bundle-invoke.ll
@@ -1,6 +1,6 @@
; RUN: not llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s
-; CHECK: LLVM ERROR: cannot lower invokes with arbitrary operand bundles!
+; CHECK: LLVM ERROR: cannot lower invokes with arbitrary operand bundles: foo
declare void @g()
declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/CodeGen/X86/kcfi.ll b/llvm/test/CodeGen/X86/kcfi.ll
index 059efcc71b0e..fd93b8e3d418 100644
--- a/llvm/test/CodeGen/X86/kcfi.ll
+++ b/llvm/test/CodeGen/X86/kcfi.ll
@@ -138,6 +138,29 @@ define void @f8() {
ret void
}
+declare i32 @__gxx_personality_v0(...)
+
+define void @f9() personality ptr @__gxx_personality_v0 {
+; MIR-LABEL: name: f9
+; MIR: body:
+; ISEL: CALL64m killed %0, 1, $noreg, 0, $noreg, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, cfi-type 12345678
+; KCFI: $r11 = MOV64rm killed renamable $rax, 1, $noreg, 0, $noreg
+; KCFI-NEXT: BUNDLE{{.*}} {
+; KCFI-NEXT: KCFI_CHECK $r11, 12345678, implicit-def $r10, implicit-def $r11, implicit-def $eflags
+; KCFI-NEXT: CALL64r internal $r11, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
+; KCFI-NEXT: }
+ %1 = load ptr, ptr @g, align 8
+ invoke void %1() [ "kcfi"(i32 12345678) ]
+ to label %cont
+ unwind label %err
+cont:
+ ret void
+err:
+ %exn = landingpad { i8*, i32 }
+ catch i8* null
+ resume { i8*, i32 } %exn
+}
+
attributes #0 = { "target-features"="+retpoline-indirect-branches,+retpoline-indirect-calls" }
!llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/X86/ldexp-f80.ll b/llvm/test/CodeGen/X86/ldexp-f80.ll
index 3a10eab2f47c..84d18d5888cf 100644
--- a/llvm/test/CodeGen/X86/ldexp-f80.ll
+++ b/llvm/test/CodeGen/X86/ldexp-f80.ll
@@ -3,33 +3,29 @@
; FIXME: Expansion without libcall
; XUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=WIN32 %s
-define x86_fp80 @ldexp_f80(x86_fp80 %arg0, i32 %arg1) {
+define x86_fp80 @ldexp_f80(x86_fp80 %arg0, i32 %arg1) nounwind {
; X64-LABEL: ldexp_f80:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq ldexpl@PLT
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%ldexp = call x86_fp80 @llvm.ldexp.f80.i32(x86_fp80 %arg0, i32 %arg1)
ret x86_fp80 %ldexp
}
-define x86_fp80 @test_strict_ldexp_f80_i32(ptr addrspace(1) %out, x86_fp80 %a, i32 %b) #2 {
+define x86_fp80 @test_strict_ldexp_f80_i32(ptr addrspace(1) %out, x86_fp80 %a, i32 %b) nounwind #2 {
; X64-LABEL: test_strict_ldexp_f80_i32:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movl %esi, %edi
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: wait
; X64-NEXT: callq ldexpl@PLT
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.ldexp.f80.i32(x86_fp80 %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret x86_fp80 %result
diff --git a/llvm/test/CodeGen/X86/ldexp-libcall.ll b/llvm/test/CodeGen/X86/ldexp-libcall.ll
index 3aec4960383f..74256c801d02 100644
--- a/llvm/test/CodeGen/X86/ldexp-libcall.ll
+++ b/llvm/test/CodeGen/X86/ldexp-libcall.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
-define float @call_ldexpf(float %a, i32 %b) {
+define float @call_ldexpf(float %a, i32 %b) nounwind {
; CHECK-LABEL: call_ldexpf:
; CHECK: # %bb.0:
; CHECK-NEXT: jmp ldexpf@PLT # TAILCALL
@@ -21,7 +21,7 @@ define float @call_ldexpf(float %a, i32 %b) {
ret float %result
}
-define double @call_ldexp(double %a, i32 %b) {
+define double @call_ldexp(double %a, i32 %b) nounwind {
; CHECK-LABEL: call_ldexp:
; CHECK: # %bb.0:
; CHECK-NEXT: jmp ldexp@PLT # TAILCALL
@@ -40,16 +40,14 @@ define double @call_ldexp(double %a, i32 %b) {
ret double %result
}
-define x86_fp80 @call_ldexpl(x86_fp80 %a, i32 %b) {
+define x86_fp80 @call_ldexpl(x86_fp80 %a, i32 %b) nounwind {
; CHECK-LABEL: call_ldexpl:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
; CHECK-NEXT: fstpt (%rsp)
; CHECK-NEXT: callq ldexpl@PLT
; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: call_ldexpl:
diff --git a/llvm/test/CodeGen/X86/ldexp-not-readonly.ll b/llvm/test/CodeGen/X86/ldexp-not-readonly.ll
index aec777375925..9a67cf8b3171 100644
--- a/llvm/test/CodeGen/X86/ldexp-not-readonly.ll
+++ b/llvm/test/CodeGen/X86/ldexp-not-readonly.ll
@@ -2,14 +2,12 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
-define float @call_ldexpf(float %a, i32 %b) {
+define float @call_ldexpf(float %a, i32 %b) nounwind {
; CHECK-LABEL: call_ldexpf:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq ldexpf@PLT
; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: call_ldexpf:
@@ -26,14 +24,12 @@ define float @call_ldexpf(float %a, i32 %b) {
ret float %result
}
-define double @call_ldexp(double %a, i32 %b) {
+define double @call_ldexp(double %a, i32 %b) nounwind {
; CHECK-LABEL: call_ldexp:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq ldexp@PLT
; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: call_ldexp:
diff --git a/llvm/test/CodeGen/X86/ldexp-strict.ll b/llvm/test/CodeGen/X86/ldexp-strict.ll
index 67e348cb95ff..f13c59da46c2 100644
--- a/llvm/test/CodeGen/X86/ldexp-strict.ll
+++ b/llvm/test/CodeGen/X86/ldexp-strict.ll
@@ -4,45 +4,40 @@
; FIXME: Expansion support without libcalls
; FIXME: Implement f16->f32 promotion for strictfp
-; define half @test_strict_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #2 {
+; define half @test_strict_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) nounwind #2 {
; %result = call half @llvm.experimental.constrained.ldexp.f16.i32(half %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
; ret half %result
; }
-define float @test_strict_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) #2 {
+define float @test_strict_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) nounwind #2 {
; X64-LABEL: test_strict_ldexp_f32_i32:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: movl %esi, %edi
; X64-NEXT: callq ldexpf@PLT
; X64-NEXT: popq %rax
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call float @llvm.experimental.constrained.ldexp.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret float %result
}
-define double @test_strict_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) #2 {
+define double @test_strict_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) nounwind #2 {
; X64-LABEL: test_strict_ldexp_f64_i32:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: movl %esi, %edi
; X64-NEXT: callq ldexp@PLT
; X64-NEXT: popq %rax
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call double @llvm.experimental.constrained.ldexp.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret double %result
}
-define <2 x float> @test_strict_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) #2 {
+define <2 x float> @test_strict_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) nounwind #2 {
; X64-LABEL: test_strict_ldexp_v2f32_v2i32:
; X64: # %bb.0:
; X64-NEXT: subq $56, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 64
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movd %xmm1, %edi
@@ -58,7 +53,6 @@ define <2 x float> @test_strict_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x fl
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: addq $56, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x float> %result
diff --git a/llvm/test/CodeGen/X86/ldexp-wrong-signature.ll b/llvm/test/CodeGen/X86/ldexp-wrong-signature.ll
index ac58bb53b174..b4ba53f80ad5 100644
--- a/llvm/test/CodeGen/X86/ldexp-wrong-signature.ll
+++ b/llvm/test/CodeGen/X86/ldexp-wrong-signature.ll
@@ -2,14 +2,12 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
-define float @ldexpf_too_many_args(float %a, i32 %b, i32 %c) {
+define float @ldexpf_too_many_args(float %a, i32 %b, i32 %c) nounwind {
; CHECK-LABEL: ldexpf_too_many_args:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq ldexpf@PLT
; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: ldexpf_too_many_args:
@@ -28,14 +26,12 @@ define float @ldexpf_too_many_args(float %a, i32 %b, i32 %c) {
ret float %result
}
-define float @ldexp_wrong_fp_type(float %a, i32 %b) {
+define float @ldexp_wrong_fp_type(float %a, i32 %b) nounwind {
; CHECK-LABEL: ldexp_wrong_fp_type:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq ldexp@PLT
; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: ldexp_wrong_fp_type:
diff --git a/llvm/test/CodeGen/X86/ldexp-wrong-signature2.ll b/llvm/test/CodeGen/X86/ldexp-wrong-signature2.ll
index ac79973106ac..d48c1c2e0a9a 100644
--- a/llvm/test/CodeGen/X86/ldexp-wrong-signature2.ll
+++ b/llvm/test/CodeGen/X86/ldexp-wrong-signature2.ll
@@ -2,14 +2,12 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
-define i32 @ldexpf_not_fp(i32 %a, i32 %b) {
+define i32 @ldexpf_not_fp(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: ldexpf_not_fp:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq ldexpf@PLT
; CHECK-NEXT: popq %rcx
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: ldexpf_not_fp:
@@ -23,14 +21,12 @@ define i32 @ldexpf_not_fp(i32 %a, i32 %b) {
ret i32 %result
}
-define float @ldexp_not_int(float %a, float %b) {
+define float @ldexp_not_int(float %a, float %b) nounwind {
; CHECK-LABEL: ldexp_not_int:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq ldexp@PLT
; CHECK-NEXT: popq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; CHECK-WIN-LABEL: ldexp_not_int:
diff --git a/llvm/test/CodeGen/X86/ldexp.ll b/llvm/test/CodeGen/X86/ldexp.ll
index 859139463b7e..59ec7bfcaa91 100644
--- a/llvm/test/CodeGen/X86/ldexp.ll
+++ b/llvm/test/CodeGen/X86/ldexp.ll
@@ -3,7 +3,7 @@
; RUN: llc -mtriple=x86_64-pc-win32 -verify-machineinstrs < %s | FileCheck -check-prefixes=WIN64 %s
; RUN: llc -mtriple=i386-pc-win32 -verify-machineinstrs < %s | FileCheck -check-prefix=WIN32 %s
-define float @ldexp_f32(i8 zeroext %x) {
+define float @ldexp_f32(i8 zeroext %x) nounwind {
; X64-LABEL: ldexp_f32:
; X64: # %bb.0:
; X64-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
@@ -12,17 +12,12 @@ define float @ldexp_f32(i8 zeroext %x) {
; WIN64-LABEL: ldexp_f32:
; WIN64: # %bb.0:
; WIN64-NEXT: subq $40, %rsp
-; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movzbl %cl, %edx
; WIN64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; WIN64-NEXT: callq ldexp
; WIN64-NEXT: cvtsd2ss %xmm0, %xmm0
-; WIN64-NEXT: .seh_startepilogue
; WIN64-NEXT: addq $40, %rsp
-; WIN64-NEXT: .seh_endepilogue
; WIN64-NEXT: retq
-; WIN64-NEXT: .seh_endproc
;
; WIN32-LABEL: ldexp_f32:
; WIN32: # %bb.0:
@@ -41,7 +36,7 @@ define float @ldexp_f32(i8 zeroext %x) {
ret float %ldexp
}
-define double @ldexp_f64(i8 zeroext %x) {
+define double @ldexp_f64(i8 zeroext %x) nounwind {
; X64-LABEL: ldexp_f64:
; X64: # %bb.0:
; X64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
@@ -68,11 +63,10 @@ define double @ldexp_f64(i8 zeroext %x) {
ret double %ldexp
}
-define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
+define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) nounwind {
; X64-LABEL: ldexp_v2f32:
; X64: # %bb.0:
; X64-NEXT: subq $56, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 64
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movd %xmm1, %edi
@@ -88,22 +82,15 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: addq $56, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN64-LABEL: ldexp_v2f32:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsi
-; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: subq $80, %rsp
-; WIN64-NEXT: .seh_stackalloc 80
; WIN64-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm8, 64
; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm7, 48
; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm6, 32
-; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movq %rdx, %rsi
; WIN64-NEXT: movaps (%rcx), %xmm7
; WIN64-NEXT: movl 12(%rdx), %edx
@@ -140,12 +127,9 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
-; WIN64-NEXT: .seh_startepilogue
; WIN64-NEXT: addq $80, %rsp
; WIN64-NEXT: popq %rsi
-; WIN64-NEXT: .seh_endepilogue
; WIN64-NEXT: retq
-; WIN64-NEXT: .seh_endproc
;
; WIN32-LABEL: ldexp_v2f32:
; WIN32: # %bb.0:
@@ -172,11 +156,10 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
ret <2 x float> %1
}
-define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
+define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) nounwind {
; X64-LABEL: ldexp_v4f32:
; X64: # %bb.0:
; X64-NEXT: subq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 80
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
@@ -210,22 +193,15 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
; X64-NEXT: # xmm1 = xmm1[0],mem[0]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: addq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN64-LABEL: ldexp_v4f32:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsi
-; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: subq $80, %rsp
-; WIN64-NEXT: .seh_stackalloc 80
; WIN64-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm8, 64
; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm7, 48
; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm6, 32
-; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movq %rdx, %rsi
; WIN64-NEXT: movaps (%rcx), %xmm7
; WIN64-NEXT: movl 12(%rdx), %edx
@@ -262,12 +238,9 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
-; WIN64-NEXT: .seh_startepilogue
; WIN64-NEXT: addq $80, %rsp
; WIN64-NEXT: popq %rsi
-; WIN64-NEXT: .seh_endepilogue
; WIN64-NEXT: retq
-; WIN64-NEXT: .seh_endproc
;
; WIN32-LABEL: ldexp_v4f32:
; WIN32: # %bb.0:
@@ -324,11 +297,10 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
ret <4 x float> %1
}
-define <2 x double> @ldexp_v2f64(<2 x double> %val, <2 x i32> %exp) {
+define <2 x double> @ldexp_v2f64(<2 x double> %val, <2 x i32> %exp) nounwind {
; X64-LABEL: ldexp_v2f64:
; X64: # %bb.0:
; X64-NEXT: subq $56, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 64
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movd %xmm1, %edi
@@ -344,20 +316,14 @@ define <2 x double> @ldexp_v2f64(<2 x double> %val, <2 x i32> %exp) {
; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: addq $56, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN64-LABEL: ldexp_v2f64:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsi
-; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: subq $64, %rsp
-; WIN64-NEXT: .seh_stackalloc 64
; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm7, 48
; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm6, 32
-; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movaps (%rcx), %xmm6
; WIN64-NEXT: movl (%rdx), %eax
; WIN64-NEXT: movl 4(%rdx), %esi
@@ -373,12 +339,9 @@ define <2 x double> @ldexp_v2f64(<2 x double> %val, <2 x i32> %exp) {
; WIN64-NEXT: movaps %xmm7, %xmm0
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
-; WIN64-NEXT: .seh_startepilogue
; WIN64-NEXT: addq $64, %rsp
; WIN64-NEXT: popq %rsi
-; WIN64-NEXT: .seh_endepilogue
; WIN64-NEXT: retq
-; WIN64-NEXT: .seh_endproc
;
; WIN32-LABEL: ldexp_v2f64:
; WIN32: # %bb.0:
@@ -406,17 +369,12 @@ define <2 x double> @ldexp_v2f64(<2 x double> %val, <2 x i32> %exp) {
ret <2 x double> %1
}
-define <4 x double> @ldexp_v4f64(<4 x double> %val, <4 x i32> %exp) {
+define <4 x double> @ldexp_v4f64(<4 x double> %val, <4 x i32> %exp) nounwind {
; X64-LABEL: ldexp_v4f64:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: pushq %rbx
-; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: subq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 96
-; X64-NEXT: .cfi_offset %rbx, -24
-; X64-NEXT: .cfi_offset %rbp, -16
; X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill
@@ -449,30 +407,19 @@ define <4 x double> @ldexp_v4f64(<4 x double> %val, <4 x i32> %exp) {
; X64-NEXT: # xmm1 = xmm1[0],mem[0]
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; X64-NEXT: addq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: popq %rbx
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: popq %rbp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN64-LABEL: ldexp_v4f64:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsi
-; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
-; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: pushq %rbx
-; WIN64-NEXT: .seh_pushreg %rbx
; WIN64-NEXT: subq $80, %rsp
-; WIN64-NEXT: .seh_stackalloc 80
; WIN64-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm8, 64
; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm7, 48
; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; WIN64-NEXT: .seh_savexmm %xmm6, 32
-; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movl 12(%r8), %esi
; WIN64-NEXT: movl 8(%r8), %edi
; WIN64-NEXT: movaps (%rdx), %xmm6
@@ -501,14 +448,11 @@ define <4 x double> @ldexp_v4f64(<4 x double> %val, <4 x i32> %exp) {
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
-; WIN64-NEXT: .seh_startepilogue
; WIN64-NEXT: addq $80, %rsp
; WIN64-NEXT: popq %rbx
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
-; WIN64-NEXT: .seh_endepilogue
; WIN64-NEXT: retq
-; WIN64-NEXT: .seh_endproc
;
; WIN32-LABEL: ldexp_v4f64:
; WIN32: # %bb.0:
@@ -565,41 +509,31 @@ define <4 x double> @ldexp_v4f64(<4 x double> %val, <4 x i32> %exp) {
ret <4 x double> %1
}
-define half @ldexp_f16(half %arg0, i32 %arg1) {
+define half @ldexp_f16(half %arg0, i32 %arg1) nounwind {
; X64-LABEL: ldexp_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
-; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: callq __extendhfsf2@PLT
; X64-NEXT: movl %ebx, %edi
; X64-NEXT: callq ldexpf@PLT
; X64-NEXT: callq __truncsfhf2@PLT
; X64-NEXT: popq %rbx
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN64-LABEL: ldexp_f16:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsi
-; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: subq $32, %rsp
-; WIN64-NEXT: .seh_stackalloc 32
-; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movl %edx, %esi
; WIN64-NEXT: callq __extendhfsf2
; WIN64-NEXT: cvtss2sd %xmm0, %xmm0
; WIN64-NEXT: movl %esi, %edx
; WIN64-NEXT: callq ldexp
; WIN64-NEXT: callq __truncdfhf2
-; WIN64-NEXT: nop
-; WIN64-NEXT: .seh_startepilogue
; WIN64-NEXT: addq $32, %rsp
; WIN64-NEXT: popq %rsi
-; WIN64-NEXT: .seh_endepilogue
; WIN64-NEXT: retq
-; WIN64-NEXT: .seh_endproc
;
; WIN32-LABEL: ldexp_f16:
; WIN32: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/lea-2.ll b/llvm/test/CodeGen/X86/lea-2.ll
index a48c02ff3e0b..0883a8e726e2 100644
--- a/llvm/test/CodeGen/X86/lea-2.ll
+++ b/llvm/test/CodeGen/X86/lea-2.ll
@@ -2,7 +2,6 @@
; RUN: llc < %s -mtriple=i686-linux | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s --check-prefix=X64
; The computation of %t4 should match a single lea, without using actual add instructions.
diff --git a/llvm/test/CodeGen/X86/lea-3.ll b/llvm/test/CodeGen/X86/lea-3.ll
index b7f1c4ae1154..2cbefc0689c1 100644
--- a/llvm/test/CodeGen/X86/lea-3.ll
+++ b/llvm/test/CodeGen/X86/lea-3.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=WIN32
define i64 @test2(i64 %a) {
diff --git a/llvm/test/CodeGen/X86/lea-4.ll b/llvm/test/CodeGen/X86/lea-4.ll
index e1f0b73c33ff..c33697e0abf3 100644
--- a/llvm/test/CodeGen/X86/lea-4.ll
+++ b/llvm/test/CodeGen/X86/lea-4.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s
define zeroext i16 @t1(i32 %on_off) nounwind {
; CHECK-LABEL: t1:
diff --git a/llvm/test/CodeGen/X86/lea-5.ll b/llvm/test/CodeGen/X86/lea-5.ll
index 908ec3eae7f6..39051eac45d7 100644
--- a/llvm/test/CodeGen/X86/lea-5.ll
+++ b/llvm/test/CodeGen/X86/lea-5.ll
@@ -4,7 +4,6 @@
; RUN: llc < %s -mtriple=x86_64-linux -O2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -O2 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-nacl -O2 | FileCheck %s -check-prefix=X32
; Function Attrs: nounwind readnone uwtable
define void @foo(i32 %x, i32 %d) #0 {
diff --git a/llvm/test/CodeGen/X86/lea.ll b/llvm/test/CodeGen/X86/lea.ll
index 33d121f6849b..28c66b94a69e 100644
--- a/llvm/test/CodeGen/X86/lea.ll
+++ b/llvm/test/CodeGen/X86/lea.ll
@@ -2,7 +2,6 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=LINUX
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefixes=WIN
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s --check-prefixes=LINUX
-; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s --check-prefixes=LINUX
define i32 @test1(i32 %x) nounwind {
; LINUX-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.f80.ll b/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
index 92d8c53c8182..61e2bba82201 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
@@ -3,52 +3,46 @@
; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefixes=X64 %s
; XUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=X64 %s
-define { x86_fp80, i32 } @test_frexp_f80_i32(x86_fp80 %a) {
+define { x86_fp80, i32 } @test_frexp_f80_i32(x86_fp80 %a) nounwind {
; X64-LABEL: test_frexp_f80_i32:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpl@PLT
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80 %a)
ret { x86_fp80, i32 } %result
}
-define x86_fp80 @test_frexp_f80_i32_only_use_fract(x86_fp80 %a) {
+define x86_fp80 @test_frexp_f80_i32_only_use_fract(x86_fp80 %a) nounwind {
; X64-LABEL: test_frexp_f80_i32_only_use_fract:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpl@PLT
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80 %a)
%result.0 = extractvalue { x86_fp80, i32 } %result, 0
ret x86_fp80 %result.0
}
-define x86_fp80 @test_frexp_f80_i32_only_use_fract_math(x86_fp80 %a) {
+define x86_fp80 @test_frexp_f80_i32_only_use_fract_math(x86_fp80 %a) nounwind {
; X64-LABEL: test_frexp_f80_i32_only_use_fract_math:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpl@PLT
; X64-NEXT: fadd %st, %st(0)
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80 %a)
%result.0 = extractvalue { x86_fp80, i32 } %result, 0
@@ -56,11 +50,10 @@ define x86_fp80 @test_frexp_f80_i32_only_use_fract_math(x86_fp80 %a) {
ret x86_fp80 %add
}
-define i32 @test_frexp_f80_i32_only_use_exp(x86_fp80 %a) {
+define i32 @test_frexp_f80_i32_only_use_exp(x86_fp80 %a) nounwind {
; X64-LABEL: test_frexp_f80_i32_only_use_exp:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
@@ -68,7 +61,6 @@ define i32 @test_frexp_f80_i32_only_use_exp(x86_fp80 %a) {
; X64-NEXT: fstp %st(0)
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80 %a)
%result.0 = extractvalue { x86_fp80, i32 } %result, 1
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.ll b/llvm/test/CodeGen/X86/llvm.frexp.ll
index 8436c1052552..83840dd85c53 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.ll
@@ -2,11 +2,10 @@
; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefixes=X64 %s
; RUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=WIN32 %s
-define { half, i32 } @test_frexp_f16_i32(half %a) {
+define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; X64-LABEL: test_frexp_f16_i32:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: callq __extendhfsf2@PLT
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -36,7 +35,6 @@ define { half, i32 } @test_frexp_f16_i32(half %a) {
; X64-NEXT: cmovbel %edx, %ecx
; X64-NEXT: pinsrw $0, %ecx, %xmm0
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f16_i32:
@@ -63,11 +61,10 @@ define { half, i32 } @test_frexp_f16_i32(half %a) {
ret { half, i32 } %result
}
-define half @test_frexp_f16_i32_only_use_fract(half %a) {
+define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind {
; X64-LABEL: test_frexp_f16_i32_only_use_fract:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: callq __extendhfsf2@PLT
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -87,7 +84,6 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) {
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: pinsrw $0, %eax, %xmm0
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f16_i32_only_use_fract:
@@ -111,11 +107,10 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) {
ret half %result.0
}
-define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
+define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; X64-LABEL: test_frexp_f16_i32_only_use_exp:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: callq __extendhfsf2@PLT
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -138,7 +133,6 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
; X64-NEXT: cmpl $33792, %ecx # imm = 0x8400
; X64-NEXT: cmoval %edx, %eax
; X64-NEXT: addq $24, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f16_i32_only_use_exp:
@@ -161,33 +155,31 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
}
; FIXME
-; define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
+; define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind {
; %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
; ret { <2 x half>, <2 x i32> } %result
; }
-; define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) {
+; define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) nounwind {
; %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
; %result.0 = extractvalue { <2 x half>, <2 x i32> } %result, 0
; ret <2 x half> %result.0
; }
-; define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) {
+; define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind {
; %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
; %result.1 = extractvalue { <2 x half>, <2 x i32> } %result, 1
; ret <2 x i32> %result.1
; }
-define { float, i32 } @test_frexp_f32_i32(float %a) {
+define { float, i32 } @test_frexp_f32_i32(float %a) nounwind {
; X64-LABEL: test_frexp_f32_i32:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpf@PLT
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: popq %rcx
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f32_i32:
@@ -207,15 +199,13 @@ define { float, i32 } @test_frexp_f32_i32(float %a) {
ret { float, i32 } %result
}
-define float @test_frexp_f32_i32_only_use_fract(float %a) {
+define float @test_frexp_f32_i32_only_use_fract(float %a) nounwind {
; X64-LABEL: test_frexp_f32_i32_only_use_fract:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpf@PLT
; X64-NEXT: popq %rax
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f32_i32_only_use_fract:
@@ -235,16 +225,14 @@ define float @test_frexp_f32_i32_only_use_fract(float %a) {
ret float %result.0
}
-define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
+define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; X64-LABEL: test_frexp_f32_i32_only_use_exp:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpf@PLT
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: popq %rcx
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f32_i32_only_use_exp:
@@ -265,28 +253,27 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
}
; FIXME: Widen vector result
-; define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) {
+; define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) nounwind {
; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a)
; ret { <2 x float>, <2 x i32> } %result
; }
-; define <2 x float> @test_frexp_v2f32_v2i32_only_use_fract(<2 x float> %a) {
+; define <2 x float> @test_frexp_v2f32_v2i32_only_use_fract(<2 x float> %a) nounwind {
; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a)
; %result.0 = extractvalue { <2 x float>, <2 x i32> } %result, 0
; ret <2 x float> %result.0
; }
-; define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) {
+; define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) nounwind {
; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a)
; %result.1 = extractvalue { <2 x float>, <2 x i32> } %result, 1
; ret <2 x i32> %result.1
; }
-define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
+define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwind {
; X64-LABEL: test_frexp_v4f32_v4i32:
; X64: # %bb.0:
; X64-NEXT: subq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 80
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; X64-NEXT: movq %rsp, %rdi
@@ -320,7 +307,6 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; X64-NEXT: addq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_v4f32_v4i32:
@@ -372,11 +358,10 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
ret { <4 x float>, <4 x i32> } %result
}
-define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) {
+define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwind {
; X64-LABEL: test_frexp_v4f32_v4i32_only_use_fract:
; X64: # %bb.0:
; X64-NEXT: subq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 80
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
@@ -403,7 +388,6 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) {
; X64-NEXT: # xmm1 = xmm1[0],mem[0]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: addq $72, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_v4f32_v4i32_only_use_fract:
@@ -456,11 +440,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) {
ret <4 x float> %result.0
}
-define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
+define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; X64-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; X64: # %bb.0:
; X64-NEXT: subq $40, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 48
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; X64-NEXT: movq %rsp, %rdi
@@ -484,7 +467,6 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: addq $40, %rsp
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
@@ -524,16 +506,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
ret <4 x i32> %result.1
}
-define { double, i32 } @test_frexp_f64_i32(double %a) {
+define { double, i32 } @test_frexp_f64_i32(double %a) nounwind {
; X64-LABEL: test_frexp_f64_i32:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexp@PLT
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: popq %rcx
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f64_i32:
@@ -551,15 +531,13 @@ define { double, i32 } @test_frexp_f64_i32(double %a) {
ret { double, i32 } %result
}
-define double @test_frexp_f64_i32_only_use_fract(double %a) {
+define double @test_frexp_f64_i32_only_use_fract(double %a) nounwind {
; X64-LABEL: test_frexp_f64_i32_only_use_fract:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexp@PLT
; X64-NEXT: popq %rax
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f64_i32_only_use_fract:
@@ -577,16 +555,14 @@ define double @test_frexp_f64_i32_only_use_fract(double %a) {
ret double %result.0
}
-define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
+define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; X64-LABEL: test_frexp_f64_i32_only_use_exp:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexp@PLT
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: popq %rcx
-; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; WIN32-LABEL: test_frexp_f64_i32_only_use_exp:
@@ -607,18 +583,18 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
}
; FIXME: Widen vector result
-; define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) {
+; define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) nounwind {
; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a)
; ret { <2 x double>, <2 x i32> } %result
; }
-; define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) {
+; define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) nounwind {
; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a)
; %result.0 = extractvalue { <2 x double>, <2 x i32> } %result, 0
; ret <2 x double> %result.0
; }
-; define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
+; define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) nounwind {
; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a)
; %result.1 = extractvalue { <2 x double>, <2 x i32> } %result, 1
; ret <2 x i32> %result.1
diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll
index fc1cc1f65627..e10e48f9aea0 100644
--- a/llvm/test/CodeGen/X86/mul128.ll
+++ b/llvm/test/CodeGen/X86/mul128.ll
@@ -18,85 +18,80 @@ define i128 @foo(i128 %t, i128 %u) {
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: pushl %ebx
-; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: pushl %edi
-; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
-; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 28
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
-; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: imull %ecx, %ebp
-; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 40(%ebp), %edi
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: imull %ecx, %esi
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: addl %ebp, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull %esi, %eax
+; X86-NEXT: addl %esi, %ebx
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: imull %edi, %eax
; X86-NEXT: addl %eax, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %ecx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: mull %ebp
-; X86-NEXT: addl %esi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: imull %ebp, %esi
+; X86-NEXT: movl 48(%ebp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: imull 28(%ebp), %ecx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: mull %edi
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: imull %edi, %esi
; X86-NEXT: addl %edx, %esi
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: adcl %ebx, %esi
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: mull %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: mull 44(%ebp)
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edi, %ebx
; X86-NEXT: setb %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: mull 44(%ebp)
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %esi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ebx, 4(%ecx)
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, 4(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl %eax, 8(%ecx)
; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 20
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
-; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: popl %edi
-; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: popl %ebx
-; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %ebp
-; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl $4
%k = mul i128 %t, %u
ret i128 %k
diff --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll
index 961205c50d97..724b2dc4c431 100644
--- a/llvm/test/CodeGen/X86/neg-abs.ll
+++ b/llvm/test/CodeGen/X86/neg-abs.ll
@@ -105,31 +105,35 @@ define i128 @neg_abs_i128(i128 %x) nounwind {
; X86-LABEL: neg_abs_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: movl 32(%ebp), %edx
; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl 28(%ebp), %esi
; X86-NEXT: xorl %ecx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: xorl %ecx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: xorl %ecx, %ebx
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: subl %ebx, %ebp
; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: subl %edi, %ebx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl %ebp, (%eax)
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -259,37 +263,42 @@ define i64 @sub_abs_i64(i64 %x, i64 %y) nounwind {
define i128 @sub_abs_i128(i128 %x, i128 %y) nounwind {
; X86-LABEL: sub_abs_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: xorl %edx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl 32(%ebp), %ecx
; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl 28(%ebp), %esi
; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: xorl %edx, %edi
; X86-NEXT: subl %edx, %edi
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: sbbl %edx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl 40(%ebp), %edx
; X86-NEXT: subl %edi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl 44(%ebp), %edi
; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl 48(%ebp), %esi
; X86-NEXT: sbbl %ecx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl 52(%ebp), %ecx
; X86-NEXT: sbbl %eax, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: sub_abs_i128:
diff --git a/llvm/test/CodeGen/X86/pcsections-atomics.ll b/llvm/test/CodeGen/X86/pcsections-atomics.ll
index 672ebc1ec727..69ae1f19f320 100644
--- a/llvm/test/CodeGen/X86/pcsections-atomics.ll
+++ b/llvm/test/CodeGen/X86/pcsections-atomics.ll
@@ -9,6 +9,7 @@
; RUN: llc -O1 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O1
; RUN: llc -O2 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O2
; RUN: llc -O3 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O3
+; RUN: llc -O3 -mcpu=haswell -mattr=cx16 < %s | FileCheck %s --check-prefixes=HASWELL-O3
target triple = "x86_64-unknown-linux-gnu"
@@ -50,6 +51,14 @@ define void @mixed_atomic_non_atomic(ptr %a) {
; O3-NEXT: movl $1, (%rdi)
; O3-NEXT: decl (%rdi)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: mixed_atomic_non_atomic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: incl (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection0:
+; HASWELL-O3-NEXT: movl $1, (%rdi)
+; HASWELL-O3-NEXT: decl (%rdi)
+; HASWELL-O3-NEXT: retq
entry:
; Accesses the same location atomically and non-atomically.
%0 = load volatile i32, ptr %a, align 4
@@ -107,6 +116,17 @@ define i64 @mixed_complex_atomic_non_atomic(ptr %a, ptr %b) {
; O3-NEXT: movq %rdx, (%rsi)
; O3-NEXT: addq %rcx, %rax
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: mixed_complex_atomic_non_atomic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movl $1, %eax
+; HASWELL-O3-NEXT: .Lpcsection1:
+; HASWELL-O3-NEXT: lock xaddq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq (%rsi), %rcx
+; HASWELL-O3-NEXT: leaq 1(%rcx), %rdx
+; HASWELL-O3-NEXT: movq %rdx, (%rsi)
+; HASWELL-O3-NEXT: addq %rcx, %rax
+; HASWELL-O3-NEXT: retq
entry:
%0 = atomicrmw add ptr %a, i64 1 monotonic, align 8, !pcsections !0
%1 = load i64, ptr %b, align 8
@@ -148,6 +168,14 @@ define i8 @atomic8_load_unordered(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection2:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a unordered, align 1, !pcsections !0
@@ -187,6 +215,14 @@ define i8 @atomic8_load_monotonic(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection3:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a monotonic, align 1, !pcsections !0
@@ -226,6 +262,14 @@ define i8 @atomic8_load_acquire(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection4:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a acquire, align 1, !pcsections !0
@@ -265,6 +309,14 @@ define i8 @atomic8_load_seq_cst(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection5:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a seq_cst, align 1, !pcsections !0
@@ -304,6 +356,14 @@ define void @atomic8_store_unordered(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection6:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a unordered, align 1, !pcsections !0
@@ -343,6 +403,14 @@ define void @atomic8_store_monotonic(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection7:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a monotonic, align 1, !pcsections !0
@@ -382,6 +450,14 @@ define void @atomic8_store_release(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection8:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a release, align 1, !pcsections !0
@@ -425,6 +501,15 @@ define void @atomic8_store_seq_cst(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection9:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a seq_cst, align 1, !pcsections !0
@@ -468,6 +553,15 @@ define void @atomic8_xchg_monotonic(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection10:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -507,6 +601,14 @@ define void @atomic8_add_monotonic(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection11:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -546,6 +648,14 @@ define void @atomic8_sub_monotonic(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection12:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -585,6 +695,14 @@ define void @atomic8_and_monotonic(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection13:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -624,6 +742,14 @@ define void @atomic8_or_monotonic(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection14:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -663,6 +789,14 @@ define void @atomic8_xor_monotonic(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection15:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -763,6 +897,27 @@ define void @atomic8_nand_monotonic(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection16:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB16_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection17:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection18:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection19:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection20:
+; HASWELL-O3-NEXT: jne .LBB16_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -806,6 +961,15 @@ define void @atomic8_xchg_acquire(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection21:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -845,6 +1009,14 @@ define void @atomic8_add_acquire(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection22:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -884,6 +1056,14 @@ define void @atomic8_sub_acquire(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection23:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -923,6 +1103,14 @@ define void @atomic8_and_acquire(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection24:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -962,6 +1150,14 @@ define void @atomic8_or_acquire(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection25:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1001,6 +1197,14 @@ define void @atomic8_xor_acquire(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection26:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1101,6 +1305,27 @@ define void @atomic8_nand_acquire(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection27:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB23_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection28:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection29:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection30:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection31:
+; HASWELL-O3-NEXT: jne .LBB23_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1144,6 +1369,15 @@ define void @atomic8_xchg_release(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection32:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1183,6 +1417,14 @@ define void @atomic8_add_release(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection33:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1222,6 +1464,14 @@ define void @atomic8_sub_release(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection34:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1261,6 +1511,14 @@ define void @atomic8_and_release(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection35:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1300,6 +1558,14 @@ define void @atomic8_or_release(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection36:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1339,6 +1605,14 @@ define void @atomic8_xor_release(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection37:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1439,6 +1713,27 @@ define void @atomic8_nand_release(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection38:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB30_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection39:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection40:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection41:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection42:
+; HASWELL-O3-NEXT: jne .LBB30_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1482,6 +1777,15 @@ define void @atomic8_xchg_acq_rel(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection43:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1521,6 +1825,14 @@ define void @atomic8_add_acq_rel(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection44:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1560,6 +1872,14 @@ define void @atomic8_sub_acq_rel(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection45:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1599,6 +1919,14 @@ define void @atomic8_and_acq_rel(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection46:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1638,6 +1966,14 @@ define void @atomic8_or_acq_rel(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection47:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1677,6 +2013,14 @@ define void @atomic8_xor_acq_rel(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection48:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1777,6 +2121,27 @@ define void @atomic8_nand_acq_rel(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection49:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB37_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection50:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection51:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection52:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection53:
+; HASWELL-O3-NEXT: jne .LBB37_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 acq_rel, align 1, !pcsections !0
@@ -1820,6 +2185,15 @@ define void @atomic8_xchg_seq_cst(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection54:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -1859,6 +2233,14 @@ define void @atomic8_add_seq_cst(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection55:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -1898,6 +2280,14 @@ define void @atomic8_sub_seq_cst(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection56:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -1937,6 +2327,14 @@ define void @atomic8_and_seq_cst(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection57:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -1976,6 +2374,14 @@ define void @atomic8_or_seq_cst(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection58:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -2015,6 +2421,14 @@ define void @atomic8_xor_seq_cst(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection59:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -2115,6 +2529,27 @@ define void @atomic8_nand_seq_cst(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection60:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB44_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection61:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection62:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection63:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection64:
+; HASWELL-O3-NEXT: jne .LBB44_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 seq_cst, align 1, !pcsections !0
@@ -2200,6 +2635,25 @@ define void @atomic8_cas_monotonic(ptr %a) {
; O3-NEXT: lock cmpxchgb %cl, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $1, %cl
+; HASWELL-O3-NEXT: .Lpcsection65:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection66:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection67:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection68:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection69:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection70:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i8 42, i8 1 monotonic monotonic, align 1, !pcsections !0
@@ -2287,6 +2741,25 @@ define void @atomic8_cas_acquire(ptr %a) {
; O3-NEXT: lock cmpxchgb %cl, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $1, %cl
+; HASWELL-O3-NEXT: .Lpcsection71:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection72:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection73:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection74:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection75:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection76:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i8 42, i8 1 acquire monotonic, align 1, !pcsections !0
@@ -2374,6 +2847,25 @@ define void @atomic8_cas_release(ptr %a) {
; O3-NEXT: lock cmpxchgb %cl, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $1, %cl
+; HASWELL-O3-NEXT: .Lpcsection77:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection78:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection79:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection80:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection81:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection82:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i8 42, i8 1 release monotonic, align 1, !pcsections !0
@@ -2461,6 +2953,25 @@ define void @atomic8_cas_acq_rel(ptr %a) {
; O3-NEXT: lock cmpxchgb %cl, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $1, %cl
+; HASWELL-O3-NEXT: .Lpcsection83:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection84:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection85:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection86:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection87:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection88:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i8 42, i8 1 acq_rel monotonic, align 1, !pcsections !0
@@ -2548,6 +3059,25 @@ define void @atomic8_cas_seq_cst(ptr %a) {
; O3-NEXT: lock cmpxchgb %cl, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_cas_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $1, %cl
+; HASWELL-O3-NEXT: .Lpcsection89:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection90:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection91:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection92:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection93:
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection94:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i8 42, i8 1 seq_cst monotonic, align 1, !pcsections !0
@@ -2589,6 +3119,14 @@ define i16 @atomic16_load_unordered(ptr %a) {
; O3-NEXT: movzwl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_load_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection95:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i16, ptr %a unordered, align 2, !pcsections !0
@@ -2628,6 +3166,14 @@ define i16 @atomic16_load_monotonic(ptr %a) {
; O3-NEXT: movzwl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_load_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection96:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i16, ptr %a monotonic, align 2, !pcsections !0
@@ -2667,6 +3213,14 @@ define i16 @atomic16_load_acquire(ptr %a) {
; O3-NEXT: movzwl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_load_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection97:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i16, ptr %a acquire, align 2, !pcsections !0
@@ -2706,6 +3260,14 @@ define i16 @atomic16_load_seq_cst(ptr %a) {
; O3-NEXT: movzwl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_load_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection98:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i16, ptr %a seq_cst, align 2, !pcsections !0
@@ -2745,6 +3307,14 @@ define void @atomic16_store_unordered(ptr %a) {
; O3-NEXT: movw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_store_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection99:
+; HASWELL-O3-NEXT: movw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i16 42, ptr %a unordered, align 2, !pcsections !0
@@ -2784,6 +3354,14 @@ define void @atomic16_store_monotonic(ptr %a) {
; O3-NEXT: movw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_store_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection100:
+; HASWELL-O3-NEXT: movw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i16 42, ptr %a monotonic, align 2, !pcsections !0
@@ -2823,6 +3401,14 @@ define void @atomic16_store_release(ptr %a) {
; O3-NEXT: movw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_store_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection101:
+; HASWELL-O3-NEXT: movw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i16 42, ptr %a release, align 2, !pcsections !0
@@ -2866,6 +3452,15 @@ define void @atomic16_store_seq_cst(ptr %a) {
; O3-NEXT: xchgw %ax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_store_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection102:
+; HASWELL-O3-NEXT: xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i16 42, ptr %a seq_cst, align 2, !pcsections !0
@@ -2909,6 +3504,15 @@ define void @atomic16_xchg_monotonic(ptr %a) {
; O3-NEXT: xchgw %ax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection103:
+; HASWELL-O3-NEXT: xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -2948,6 +3552,14 @@ define void @atomic16_add_monotonic(ptr %a) {
; O3-NEXT: lock addw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_add_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection104:
+; HASWELL-O3-NEXT: lock addw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -2987,6 +3599,14 @@ define void @atomic16_sub_monotonic(ptr %a) {
; O3-NEXT: lock subw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection105:
+; HASWELL-O3-NEXT: lock subw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3026,6 +3646,14 @@ define void @atomic16_and_monotonic(ptr %a) {
; O3-NEXT: lock andw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_and_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection106:
+; HASWELL-O3-NEXT: lock andw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3065,6 +3693,14 @@ define void @atomic16_or_monotonic(ptr %a) {
; O3-NEXT: lock orw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_or_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection107:
+; HASWELL-O3-NEXT: lock orw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3104,6 +3740,14 @@ define void @atomic16_xor_monotonic(ptr %a) {
; O3-NEXT: lock xorw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection108:
+; HASWELL-O3-NEXT: lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3220,6 +3864,31 @@ define void @atomic16_nand_monotonic(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection109:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB64_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection110:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection111:
+; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT: .Lpcsection112:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT: .Lpcsection113:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection114:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT: .Lpcsection115:
+; HASWELL-O3-NEXT: jne .LBB64_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i16 42 monotonic, align 2, !pcsections !0
@@ -3263,6 +3932,15 @@ define void @atomic16_xchg_acquire(ptr %a) {
; O3-NEXT: xchgw %ax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection116:
+; HASWELL-O3-NEXT: xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3302,6 +3980,14 @@ define void @atomic16_add_acquire(ptr %a) {
; O3-NEXT: lock addw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_add_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection117:
+; HASWELL-O3-NEXT: lock addw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3341,6 +4027,14 @@ define void @atomic16_sub_acquire(ptr %a) {
; O3-NEXT: lock subw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection118:
+; HASWELL-O3-NEXT: lock subw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3380,6 +4074,14 @@ define void @atomic16_and_acquire(ptr %a) {
; O3-NEXT: lock andw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_and_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection119:
+; HASWELL-O3-NEXT: lock andw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3419,6 +4121,14 @@ define void @atomic16_or_acquire(ptr %a) {
; O3-NEXT: lock orw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_or_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection120:
+; HASWELL-O3-NEXT: lock orw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3458,6 +4168,14 @@ define void @atomic16_xor_acquire(ptr %a) {
; O3-NEXT: lock xorw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection121:
+; HASWELL-O3-NEXT: lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3574,6 +4292,31 @@ define void @atomic16_nand_acquire(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection122:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB71_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection123:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection124:
+; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT: .Lpcsection125:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT: .Lpcsection126:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection127:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT: .Lpcsection128:
+; HASWELL-O3-NEXT: jne .LBB71_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i16 42 acquire, align 2, !pcsections !0
@@ -3617,6 +4360,15 @@ define void @atomic16_xchg_release(ptr %a) {
; O3-NEXT: xchgw %ax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection129:
+; HASWELL-O3-NEXT: xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3656,6 +4408,14 @@ define void @atomic16_add_release(ptr %a) {
; O3-NEXT: lock addw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_add_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection130:
+; HASWELL-O3-NEXT: lock addw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3695,6 +4455,14 @@ define void @atomic16_sub_release(ptr %a) {
; O3-NEXT: lock subw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection131:
+; HASWELL-O3-NEXT: lock subw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3734,6 +4502,14 @@ define void @atomic16_and_release(ptr %a) {
; O3-NEXT: lock andw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_and_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection132:
+; HASWELL-O3-NEXT: lock andw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3773,6 +4549,14 @@ define void @atomic16_or_release(ptr %a) {
; O3-NEXT: lock orw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_or_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection133:
+; HASWELL-O3-NEXT: lock orw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3812,6 +4596,14 @@ define void @atomic16_xor_release(ptr %a) {
; O3-NEXT: lock xorw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection134:
+; HASWELL-O3-NEXT: lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3928,6 +4720,31 @@ define void @atomic16_nand_release(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection135:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB78_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection136:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection137:
+; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT: .Lpcsection138:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT: .Lpcsection139:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection140:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT: .Lpcsection141:
+; HASWELL-O3-NEXT: jne .LBB78_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i16 42 release, align 2, !pcsections !0
@@ -3971,6 +4788,15 @@ define void @atomic16_xchg_acq_rel(ptr %a) {
; O3-NEXT: xchgw %ax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection142:
+; HASWELL-O3-NEXT: xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4010,6 +4836,14 @@ define void @atomic16_add_acq_rel(ptr %a) {
; O3-NEXT: lock addw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_add_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection143:
+; HASWELL-O3-NEXT: lock addw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4049,6 +4883,14 @@ define void @atomic16_sub_acq_rel(ptr %a) {
; O3-NEXT: lock subw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection144:
+; HASWELL-O3-NEXT: lock subw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4088,6 +4930,14 @@ define void @atomic16_and_acq_rel(ptr %a) {
; O3-NEXT: lock andw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_and_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection145:
+; HASWELL-O3-NEXT: lock andw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4127,6 +4977,14 @@ define void @atomic16_or_acq_rel(ptr %a) {
; O3-NEXT: lock orw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_or_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection146:
+; HASWELL-O3-NEXT: lock orw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4166,6 +5024,14 @@ define void @atomic16_xor_acq_rel(ptr %a) {
; O3-NEXT: lock xorw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection147:
+; HASWELL-O3-NEXT: lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4282,6 +5148,31 @@ define void @atomic16_nand_acq_rel(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection148:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB85_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection149:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection150:
+; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT: .Lpcsection151:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT: .Lpcsection152:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection153:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT: .Lpcsection154:
+; HASWELL-O3-NEXT: jne .LBB85_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i16 42 acq_rel, align 2, !pcsections !0
@@ -4325,6 +5216,15 @@ define void @atomic16_xchg_seq_cst(ptr %a) {
; O3-NEXT: xchgw %ax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xchg_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection155:
+; HASWELL-O3-NEXT: xchgw %ax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4364,6 +5264,14 @@ define void @atomic16_add_seq_cst(ptr %a) {
; O3-NEXT: lock addw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_add_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection156:
+; HASWELL-O3-NEXT: lock addw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4403,6 +5311,14 @@ define void @atomic16_sub_seq_cst(ptr %a) {
; O3-NEXT: lock subw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_sub_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection157:
+; HASWELL-O3-NEXT: lock subw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4442,6 +5358,14 @@ define void @atomic16_and_seq_cst(ptr %a) {
; O3-NEXT: lock andw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_and_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection158:
+; HASWELL-O3-NEXT: lock andw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4481,6 +5405,14 @@ define void @atomic16_or_seq_cst(ptr %a) {
; O3-NEXT: lock orw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_or_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection159:
+; HASWELL-O3-NEXT: lock orw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4520,6 +5452,14 @@ define void @atomic16_xor_seq_cst(ptr %a) {
; O3-NEXT: lock xorw $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_xor_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection160:
+; HASWELL-O3-NEXT: lock xorw $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4636,6 +5576,31 @@ define void @atomic16_nand_seq_cst(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_nand_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection161:
+; HASWELL-O3-NEXT: movzwl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB92_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection162:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection163:
+; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5
+; HASWELL-O3-NEXT: .Lpcsection164:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax
+; HASWELL-O3-NEXT: .Lpcsection165:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection166:
+; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax
+; HASWELL-O3-NEXT: .Lpcsection167:
+; HASWELL-O3-NEXT: jne .LBB92_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i16 42 seq_cst, align 2, !pcsections !0
@@ -4712,6 +5677,22 @@ define void @atomic16_cas_monotonic(ptr %a) {
; O3-NEXT: lock cmpxchgw %cx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $1, %cx
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection168:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection169:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection170:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i16 42, i16 1 monotonic monotonic, align 2, !pcsections !0
@@ -4790,6 +5771,22 @@ define void @atomic16_cas_acquire(ptr %a) {
; O3-NEXT: lock cmpxchgw %cx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $1, %cx
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection171:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection172:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection173:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i16 42, i16 1 acquire monotonic, align 2, !pcsections !0
@@ -4868,6 +5865,22 @@ define void @atomic16_cas_release(ptr %a) {
; O3-NEXT: lock cmpxchgw %cx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $1, %cx
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection174:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection175:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection176:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i16 42, i16 1 release monotonic, align 2, !pcsections !0
@@ -4946,6 +5959,22 @@ define void @atomic16_cas_acq_rel(ptr %a) {
; O3-NEXT: lock cmpxchgw %cx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $1, %cx
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection177:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection178:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection179:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i16 42, i16 1 acq_rel monotonic, align 2, !pcsections !0
@@ -5024,6 +6053,22 @@ define void @atomic16_cas_seq_cst(ptr %a) {
; O3-NEXT: lock cmpxchgw %cx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic16_cas_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movw $1, %cx
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection180:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection181:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movw $42, %ax
+; HASWELL-O3-NEXT: .Lpcsection182:
+; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i16 42, i16 1 seq_cst monotonic, align 2, !pcsections !0
@@ -5065,6 +6110,14 @@ define i32 @atomic32_load_unordered(ptr %a) {
; O3-NEXT: movl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_load_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection183:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i32, ptr %a unordered, align 4, !pcsections !0
@@ -5104,6 +6157,14 @@ define i32 @atomic32_load_monotonic(ptr %a) {
; O3-NEXT: movl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_load_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection184:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i32, ptr %a monotonic, align 4, !pcsections !0
@@ -5143,6 +6204,14 @@ define i32 @atomic32_load_acquire(ptr %a) {
; O3-NEXT: movl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_load_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection185:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i32, ptr %a acquire, align 4, !pcsections !0
@@ -5182,6 +6251,14 @@ define i32 @atomic32_load_seq_cst(ptr %a) {
; O3-NEXT: movl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_load_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection186:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i32, ptr %a seq_cst, align 4, !pcsections !0
@@ -5221,6 +6298,14 @@ define void @atomic32_store_unordered(ptr %a) {
; O3-NEXT: movl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_store_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection187:
+; HASWELL-O3-NEXT: movl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i32 42, ptr %a unordered, align 4, !pcsections !0
@@ -5260,6 +6345,14 @@ define void @atomic32_store_monotonic(ptr %a) {
; O3-NEXT: movl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_store_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection188:
+; HASWELL-O3-NEXT: movl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i32 42, ptr %a monotonic, align 4, !pcsections !0
@@ -5299,6 +6392,14 @@ define void @atomic32_store_release(ptr %a) {
; O3-NEXT: movl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_store_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection189:
+; HASWELL-O3-NEXT: movl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i32 42, ptr %a release, align 4, !pcsections !0
@@ -5342,6 +6443,15 @@ define void @atomic32_store_seq_cst(ptr %a) {
; O3-NEXT: xchgl %eax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_store_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection190:
+; HASWELL-O3-NEXT: xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i32 42, ptr %a seq_cst, align 4, !pcsections !0
@@ -5385,6 +6495,15 @@ define void @atomic32_xchg_monotonic(ptr %a) {
; O3-NEXT: xchgl %eax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection191:
+; HASWELL-O3-NEXT: xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5424,6 +6543,14 @@ define void @atomic32_add_monotonic(ptr %a) {
; O3-NEXT: lock addl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_add_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection192:
+; HASWELL-O3-NEXT: lock addl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5463,6 +6590,14 @@ define void @atomic32_sub_monotonic(ptr %a) {
; O3-NEXT: lock subl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection193:
+; HASWELL-O3-NEXT: lock subl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5502,6 +6637,14 @@ define void @atomic32_and_monotonic(ptr %a) {
; O3-NEXT: lock andl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_and_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection194:
+; HASWELL-O3-NEXT: lock andl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5541,6 +6684,14 @@ define void @atomic32_or_monotonic(ptr %a) {
; O3-NEXT: lock orl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_or_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection195:
+; HASWELL-O3-NEXT: lock orl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5580,6 +6731,14 @@ define void @atomic32_xor_monotonic(ptr %a) {
; O3-NEXT: lock xorl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection196:
+; HASWELL-O3-NEXT: lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5680,6 +6839,27 @@ define void @atomic32_nand_monotonic(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection197:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB112_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection198:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection199:
+; HASWELL-O3-NEXT: orl $-43, %ecx
+; HASWELL-O3-NEXT: .Lpcsection200:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection201:
+; HASWELL-O3-NEXT: jne .LBB112_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i32 42 monotonic, align 4, !pcsections !0
@@ -5723,6 +6903,15 @@ define void @atomic32_xchg_acquire(ptr %a) {
; O3-NEXT: xchgl %eax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection202:
+; HASWELL-O3-NEXT: xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5762,6 +6951,14 @@ define void @atomic32_add_acquire(ptr %a) {
; O3-NEXT: lock addl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_add_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection203:
+; HASWELL-O3-NEXT: lock addl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5801,6 +6998,14 @@ define void @atomic32_sub_acquire(ptr %a) {
; O3-NEXT: lock subl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection204:
+; HASWELL-O3-NEXT: lock subl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5840,6 +7045,14 @@ define void @atomic32_and_acquire(ptr %a) {
; O3-NEXT: lock andl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_and_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection205:
+; HASWELL-O3-NEXT: lock andl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5879,6 +7092,14 @@ define void @atomic32_or_acquire(ptr %a) {
; O3-NEXT: lock orl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_or_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection206:
+; HASWELL-O3-NEXT: lock orl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -5918,6 +7139,14 @@ define void @atomic32_xor_acquire(ptr %a) {
; O3-NEXT: lock xorl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection207:
+; HASWELL-O3-NEXT: lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -6018,6 +7247,27 @@ define void @atomic32_nand_acquire(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection208:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB119_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection209:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection210:
+; HASWELL-O3-NEXT: orl $-43, %ecx
+; HASWELL-O3-NEXT: .Lpcsection211:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection212:
+; HASWELL-O3-NEXT: jne .LBB119_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i32 42 acquire, align 4, !pcsections !0
@@ -6061,6 +7311,15 @@ define void @atomic32_xchg_release(ptr %a) {
; O3-NEXT: xchgl %eax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection213:
+; HASWELL-O3-NEXT: xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6100,6 +7359,14 @@ define void @atomic32_add_release(ptr %a) {
; O3-NEXT: lock addl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_add_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection214:
+; HASWELL-O3-NEXT: lock addl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6139,6 +7406,14 @@ define void @atomic32_sub_release(ptr %a) {
; O3-NEXT: lock subl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection215:
+; HASWELL-O3-NEXT: lock subl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6178,6 +7453,14 @@ define void @atomic32_and_release(ptr %a) {
; O3-NEXT: lock andl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_and_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection216:
+; HASWELL-O3-NEXT: lock andl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6217,6 +7500,14 @@ define void @atomic32_or_release(ptr %a) {
; O3-NEXT: lock orl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_or_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection217:
+; HASWELL-O3-NEXT: lock orl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6256,6 +7547,14 @@ define void @atomic32_xor_release(ptr %a) {
; O3-NEXT: lock xorl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection218:
+; HASWELL-O3-NEXT: lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6356,6 +7655,27 @@ define void @atomic32_nand_release(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection219:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB126_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection220:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection221:
+; HASWELL-O3-NEXT: orl $-43, %ecx
+; HASWELL-O3-NEXT: .Lpcsection222:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection223:
+; HASWELL-O3-NEXT: jne .LBB126_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i32 42 release, align 4, !pcsections !0
@@ -6399,6 +7719,15 @@ define void @atomic32_xchg_acq_rel(ptr %a) {
; O3-NEXT: xchgl %eax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection224:
+; HASWELL-O3-NEXT: xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6438,6 +7767,14 @@ define void @atomic32_add_acq_rel(ptr %a) {
; O3-NEXT: lock addl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_add_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection225:
+; HASWELL-O3-NEXT: lock addl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6477,6 +7814,14 @@ define void @atomic32_sub_acq_rel(ptr %a) {
; O3-NEXT: lock subl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection226:
+; HASWELL-O3-NEXT: lock subl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6516,6 +7861,14 @@ define void @atomic32_and_acq_rel(ptr %a) {
; O3-NEXT: lock andl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_and_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection227:
+; HASWELL-O3-NEXT: lock andl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6555,6 +7908,14 @@ define void @atomic32_or_acq_rel(ptr %a) {
; O3-NEXT: lock orl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_or_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection228:
+; HASWELL-O3-NEXT: lock orl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6594,6 +7955,14 @@ define void @atomic32_xor_acq_rel(ptr %a) {
; O3-NEXT: lock xorl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection229:
+; HASWELL-O3-NEXT: lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6694,6 +8063,27 @@ define void @atomic32_nand_acq_rel(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection230:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB133_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection231:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection232:
+; HASWELL-O3-NEXT: orl $-43, %ecx
+; HASWELL-O3-NEXT: .Lpcsection233:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection234:
+; HASWELL-O3-NEXT: jne .LBB133_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i32 42 acq_rel, align 4, !pcsections !0
@@ -6737,6 +8127,15 @@ define void @atomic32_xchg_seq_cst(ptr %a) {
; O3-NEXT: xchgl %eax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xchg_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection235:
+; HASWELL-O3-NEXT: xchgl %eax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6776,6 +8175,14 @@ define void @atomic32_add_seq_cst(ptr %a) {
; O3-NEXT: lock addl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_add_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection236:
+; HASWELL-O3-NEXT: lock addl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6815,6 +8222,14 @@ define void @atomic32_sub_seq_cst(ptr %a) {
; O3-NEXT: lock subl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_sub_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection237:
+; HASWELL-O3-NEXT: lock subl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6854,6 +8269,14 @@ define void @atomic32_and_seq_cst(ptr %a) {
; O3-NEXT: lock andl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_and_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection238:
+; HASWELL-O3-NEXT: lock andl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6893,6 +8316,14 @@ define void @atomic32_or_seq_cst(ptr %a) {
; O3-NEXT: lock orl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_or_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection239:
+; HASWELL-O3-NEXT: lock orl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -6932,6 +8363,14 @@ define void @atomic32_xor_seq_cst(ptr %a) {
; O3-NEXT: lock xorl $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_xor_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection240:
+; HASWELL-O3-NEXT: lock xorl $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -7032,6 +8471,27 @@ define void @atomic32_nand_seq_cst(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_nand_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection241:
+; HASWELL-O3-NEXT: movl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB140_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection242:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection243:
+; HASWELL-O3-NEXT: orl $-43, %ecx
+; HASWELL-O3-NEXT: .Lpcsection244:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection245:
+; HASWELL-O3-NEXT: jne .LBB140_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i32 42 seq_cst, align 4, !pcsections !0
@@ -7117,6 +8577,25 @@ define void @atomic32_cas_monotonic(ptr %a) {
; O3-NEXT: lock cmpxchgl %ecx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection246:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection247:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection248:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection249:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection250:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection251:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i32 42, i32 1 monotonic monotonic, align 4, !pcsections !0
@@ -7204,6 +8683,25 @@ define void @atomic32_cas_acquire(ptr %a) {
; O3-NEXT: lock cmpxchgl %ecx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection252:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection253:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection254:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection255:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection256:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection257:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i32 42, i32 1 acquire monotonic, align 4, !pcsections !0
@@ -7291,6 +8789,25 @@ define void @atomic32_cas_release(ptr %a) {
; O3-NEXT: lock cmpxchgl %ecx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection258:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection259:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection260:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection261:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection262:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection263:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i32 42, i32 1 release monotonic, align 4, !pcsections !0
@@ -7378,6 +8895,25 @@ define void @atomic32_cas_acq_rel(ptr %a) {
; O3-NEXT: lock cmpxchgl %ecx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection264:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection265:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection266:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection267:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection268:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection269:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i32 42, i32 1 acq_rel monotonic, align 4, !pcsections !0
@@ -7465,6 +9001,25 @@ define void @atomic32_cas_seq_cst(ptr %a) {
; O3-NEXT: lock cmpxchgl %ecx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic32_cas_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection270:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection271:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection272:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection273:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection274:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection275:
+; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i32 42, i32 1 seq_cst monotonic, align 4, !pcsections !0
@@ -7506,6 +9061,14 @@ define i64 @atomic64_load_unordered(ptr %a) {
; O3-NEXT: movq (%rdi), %rax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_load_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection276:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i64, ptr %a unordered, align 8, !pcsections !0
@@ -7545,6 +9108,14 @@ define i64 @atomic64_load_monotonic(ptr %a) {
; O3-NEXT: movq (%rdi), %rax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_load_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection277:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i64, ptr %a monotonic, align 8, !pcsections !0
@@ -7584,6 +9155,14 @@ define i64 @atomic64_load_acquire(ptr %a) {
; O3-NEXT: movq (%rdi), %rax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_load_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection278:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i64, ptr %a acquire, align 8, !pcsections !0
@@ -7623,6 +9202,14 @@ define i64 @atomic64_load_seq_cst(ptr %a) {
; O3-NEXT: movq (%rdi), %rax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_load_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection279:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i64, ptr %a seq_cst, align 8, !pcsections !0
@@ -7662,6 +9249,14 @@ define ptr @atomic64_load_seq_cst_ptr_ty(ptr %a) {
; O3-NEXT: movq (%rdi), %rax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_load_seq_cst_ptr_ty:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection280:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic ptr, ptr %a seq_cst, align 8, !pcsections !0
@@ -7701,6 +9296,14 @@ define void @atomic64_store_unordered(ptr %a) {
; O3-NEXT: movq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_store_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection281:
+; HASWELL-O3-NEXT: movq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i64 42, ptr %a unordered, align 8, !pcsections !0
@@ -7740,6 +9343,14 @@ define void @atomic64_store_monotonic(ptr %a) {
; O3-NEXT: movq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_store_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection282:
+; HASWELL-O3-NEXT: movq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i64 42, ptr %a monotonic, align 8, !pcsections !0
@@ -7779,6 +9390,14 @@ define void @atomic64_store_release(ptr %a) {
; O3-NEXT: movq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_store_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection283:
+; HASWELL-O3-NEXT: movq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i64 42, ptr %a release, align 8, !pcsections !0
@@ -7822,6 +9441,15 @@ define void @atomic64_store_seq_cst(ptr %a) {
; O3-NEXT: xchgq %rax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_store_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection284:
+; HASWELL-O3-NEXT: xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i64 42, ptr %a seq_cst, align 8, !pcsections !0
@@ -7861,6 +9489,14 @@ define void @atomic64_store_seq_cst_ptr_ty(ptr %a, ptr %v) {
; O3-NEXT: xchgq %rsi, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_store_seq_cst_ptr_ty:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection285:
+; HASWELL-O3-NEXT: xchgq %rsi, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic ptr %v, ptr %a seq_cst, align 8, !pcsections !0
@@ -7904,6 +9540,15 @@ define void @atomic64_xchg_monotonic(ptr %a) {
; O3-NEXT: xchgq %rax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection286:
+; HASWELL-O3-NEXT: xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -7943,6 +9588,14 @@ define void @atomic64_add_monotonic(ptr %a) {
; O3-NEXT: lock addq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_add_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection287:
+; HASWELL-O3-NEXT: lock addq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -7982,6 +9635,14 @@ define void @atomic64_sub_monotonic(ptr %a) {
; O3-NEXT: lock subq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection288:
+; HASWELL-O3-NEXT: lock subq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8021,6 +9682,14 @@ define void @atomic64_and_monotonic(ptr %a) {
; O3-NEXT: lock andq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_and_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection289:
+; HASWELL-O3-NEXT: lock andq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8060,6 +9729,14 @@ define void @atomic64_or_monotonic(ptr %a) {
; O3-NEXT: lock orq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_or_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection290:
+; HASWELL-O3-NEXT: lock orq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8099,6 +9776,14 @@ define void @atomic64_xor_monotonic(ptr %a) {
; O3-NEXT: lock xorq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection291:
+; HASWELL-O3-NEXT: lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8202,6 +9887,27 @@ define void @atomic64_nand_monotonic(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection292:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB162_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection293:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection294:
+; HASWELL-O3-NEXT: orq $-43, %rcx
+; HASWELL-O3-NEXT: .Lpcsection295:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection296:
+; HASWELL-O3-NEXT: jne .LBB162_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i64 42 monotonic, align 8, !pcsections !0
@@ -8245,6 +9951,15 @@ define void @atomic64_xchg_acquire(ptr %a) {
; O3-NEXT: xchgq %rax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection297:
+; HASWELL-O3-NEXT: xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8284,6 +9999,14 @@ define void @atomic64_add_acquire(ptr %a) {
; O3-NEXT: lock addq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_add_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection298:
+; HASWELL-O3-NEXT: lock addq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8323,6 +10046,14 @@ define void @atomic64_sub_acquire(ptr %a) {
; O3-NEXT: lock subq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection299:
+; HASWELL-O3-NEXT: lock subq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8362,6 +10093,14 @@ define void @atomic64_and_acquire(ptr %a) {
; O3-NEXT: lock andq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_and_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection300:
+; HASWELL-O3-NEXT: lock andq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8401,6 +10140,14 @@ define void @atomic64_or_acquire(ptr %a) {
; O3-NEXT: lock orq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_or_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection301:
+; HASWELL-O3-NEXT: lock orq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8440,6 +10187,14 @@ define void @atomic64_xor_acquire(ptr %a) {
; O3-NEXT: lock xorq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection302:
+; HASWELL-O3-NEXT: lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8543,6 +10298,27 @@ define void @atomic64_nand_acquire(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection303:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB169_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection304:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection305:
+; HASWELL-O3-NEXT: orq $-43, %rcx
+; HASWELL-O3-NEXT: .Lpcsection306:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection307:
+; HASWELL-O3-NEXT: jne .LBB169_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i64 42 acquire, align 8, !pcsections !0
@@ -8586,6 +10362,15 @@ define void @atomic64_xchg_release(ptr %a) {
; O3-NEXT: xchgq %rax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection308:
+; HASWELL-O3-NEXT: xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8625,6 +10410,14 @@ define void @atomic64_add_release(ptr %a) {
; O3-NEXT: lock addq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_add_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection309:
+; HASWELL-O3-NEXT: lock addq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8664,6 +10457,14 @@ define void @atomic64_sub_release(ptr %a) {
; O3-NEXT: lock subq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection310:
+; HASWELL-O3-NEXT: lock subq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8703,6 +10504,14 @@ define void @atomic64_and_release(ptr %a) {
; O3-NEXT: lock andq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_and_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection311:
+; HASWELL-O3-NEXT: lock andq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8742,6 +10551,14 @@ define void @atomic64_or_release(ptr %a) {
; O3-NEXT: lock orq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_or_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection312:
+; HASWELL-O3-NEXT: lock orq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8781,6 +10598,14 @@ define void @atomic64_xor_release(ptr %a) {
; O3-NEXT: lock xorq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection313:
+; HASWELL-O3-NEXT: lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8884,6 +10709,27 @@ define void @atomic64_nand_release(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection314:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB176_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection315:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection316:
+; HASWELL-O3-NEXT: orq $-43, %rcx
+; HASWELL-O3-NEXT: .Lpcsection317:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection318:
+; HASWELL-O3-NEXT: jne .LBB176_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i64 42 release, align 8, !pcsections !0
@@ -8927,6 +10773,15 @@ define void @atomic64_xchg_acq_rel(ptr %a) {
; O3-NEXT: xchgq %rax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection319:
+; HASWELL-O3-NEXT: xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -8966,6 +10821,14 @@ define void @atomic64_add_acq_rel(ptr %a) {
; O3-NEXT: lock addq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_add_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection320:
+; HASWELL-O3-NEXT: lock addq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9005,6 +10868,14 @@ define void @atomic64_sub_acq_rel(ptr %a) {
; O3-NEXT: lock subq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection321:
+; HASWELL-O3-NEXT: lock subq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9044,6 +10915,14 @@ define void @atomic64_and_acq_rel(ptr %a) {
; O3-NEXT: lock andq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_and_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection322:
+; HASWELL-O3-NEXT: lock andq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9083,6 +10962,14 @@ define void @atomic64_or_acq_rel(ptr %a) {
; O3-NEXT: lock orq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_or_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection323:
+; HASWELL-O3-NEXT: lock orq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9122,6 +11009,14 @@ define void @atomic64_xor_acq_rel(ptr %a) {
; O3-NEXT: lock xorq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection324:
+; HASWELL-O3-NEXT: lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9225,6 +11120,27 @@ define void @atomic64_nand_acq_rel(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection325:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB183_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection326:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection327:
+; HASWELL-O3-NEXT: orq $-43, %rcx
+; HASWELL-O3-NEXT: .Lpcsection328:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection329:
+; HASWELL-O3-NEXT: jne .LBB183_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i64 42 acq_rel, align 8, !pcsections !0
@@ -9268,6 +11184,15 @@ define void @atomic64_xchg_seq_cst(ptr %a) {
; O3-NEXT: xchgq %rax, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xchg_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection330:
+; HASWELL-O3-NEXT: xchgq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9307,6 +11232,14 @@ define void @atomic64_add_seq_cst(ptr %a) {
; O3-NEXT: lock addq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_add_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection331:
+; HASWELL-O3-NEXT: lock addq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9346,6 +11279,14 @@ define void @atomic64_sub_seq_cst(ptr %a) {
; O3-NEXT: lock subq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_sub_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection332:
+; HASWELL-O3-NEXT: lock subq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9385,6 +11326,14 @@ define void @atomic64_and_seq_cst(ptr %a) {
; O3-NEXT: lock andq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_and_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection333:
+; HASWELL-O3-NEXT: lock andq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9424,6 +11373,14 @@ define void @atomic64_or_seq_cst(ptr %a) {
; O3-NEXT: lock orq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_or_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection334:
+; HASWELL-O3-NEXT: lock orq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9463,6 +11420,14 @@ define void @atomic64_xor_seq_cst(ptr %a) {
; O3-NEXT: lock xorq $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_xor_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection335:
+; HASWELL-O3-NEXT: lock xorq $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9566,6 +11531,27 @@ define void @atomic64_nand_seq_cst(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_nand_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection336:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB190_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection337:
+; HASWELL-O3-NEXT: notl %ecx
+; HASWELL-O3-NEXT: .Lpcsection338:
+; HASWELL-O3-NEXT: orq $-43, %rcx
+; HASWELL-O3-NEXT: .Lpcsection339:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection340:
+; HASWELL-O3-NEXT: jne .LBB190_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i64 42 seq_cst, align 8, !pcsections !0
@@ -9651,6 +11637,25 @@ define void @atomic64_cas_monotonic(ptr %a) {
; O3-NEXT: lock cmpxchgq %rcx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection341:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection342:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection343:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection344:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection345:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection346:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i64 42, i64 1 monotonic monotonic, align 8, !pcsections !0
@@ -9738,6 +11743,25 @@ define void @atomic64_cas_acquire(ptr %a) {
; O3-NEXT: lock cmpxchgq %rcx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection347:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection348:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection349:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection350:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection351:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection352:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i64 42, i64 1 acquire monotonic, align 8, !pcsections !0
@@ -9825,6 +11849,25 @@ define void @atomic64_cas_release(ptr %a) {
; O3-NEXT: lock cmpxchgq %rcx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection353:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection354:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection355:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection356:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection357:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection358:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i64 42, i64 1 release monotonic, align 8, !pcsections !0
@@ -9912,6 +11955,25 @@ define void @atomic64_cas_acq_rel(ptr %a) {
; O3-NEXT: lock cmpxchgq %rcx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection359:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection360:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection361:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection362:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection363:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection364:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i64 42, i64 1 acq_rel monotonic, align 8, !pcsections !0
@@ -9999,6 +12061,25 @@ define void @atomic64_cas_seq_cst(ptr %a) {
; O3-NEXT: lock cmpxchgq %rcx, (%rdi)
; O3-NEXT: movq $3, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movl $1, %ecx
+; HASWELL-O3-NEXT: .Lpcsection365:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection366:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection367:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection368:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection369:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection370:
+; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
+; HASWELL-O3-NEXT: movq $3, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i64 42, i64 1 seq_cst monotonic, align 8, !pcsections !0
@@ -10044,6 +12125,15 @@ define void @atomic64_cas_seq_cst_ptr_ty(ptr %a, ptr %v1, ptr %v2) {
; O3-NEXT: lock cmpxchgq %rdx, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic64_cas_seq_cst_ptr_ty:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq %rsi, %rax
+; HASWELL-O3-NEXT: movq foo(%rip), %rcx
+; HASWELL-O3-NEXT: .Lpcsection371:
+; HASWELL-O3-NEXT: lock cmpxchgq %rdx, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, ptr %v1, ptr %v2 seq_cst seq_cst, align 8, !pcsections !0
@@ -10102,6 +12192,18 @@ define i64 @atomic_use_cond(ptr %a) {
; O3-NEXT: .LBB197_2: # %else
; O3-NEXT: movl $2, %eax
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic_use_cond:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: .Lpcsection372:
+; HASWELL-O3-NEXT: lock decq (%rdi)
+; HASWELL-O3-NEXT: jne .LBB197_2
+; HASWELL-O3-NEXT: # %bb.1: # %then
+; HASWELL-O3-NEXT: movl $1, %eax
+; HASWELL-O3-NEXT: retq
+; HASWELL-O3-NEXT: .LBB197_2: # %else
+; HASWELL-O3-NEXT: movl $2, %eax
+; HASWELL-O3-NEXT: retq
entry:
%x = atomicrmw sub ptr %a, i64 1 seq_cst, align 8, !pcsections !0
%y = icmp eq i64 %x, 1
@@ -10196,6 +12298,18 @@ define i128 @atomic128_load_unordered(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_load_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection373:
+; HASWELL-O3-NEXT: vmovdqa (%rdi), %xmm0
+; HASWELL-O3-NEXT: .Lpcsection374:
+; HASWELL-O3-NEXT: vmovq %xmm0, %rax
+; HASWELL-O3-NEXT: .Lpcsection375:
+; HASWELL-O3-NEXT: vpextrq $1, %xmm0, %rdx
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i128, ptr %a unordered, align 16, !pcsections !0
@@ -10285,6 +12399,18 @@ define i128 @atomic128_load_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_load_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection376:
+; HASWELL-O3-NEXT: vmovdqa (%rdi), %xmm0
+; HASWELL-O3-NEXT: .Lpcsection377:
+; HASWELL-O3-NEXT: vmovq %xmm0, %rax
+; HASWELL-O3-NEXT: .Lpcsection378:
+; HASWELL-O3-NEXT: vpextrq $1, %xmm0, %rdx
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i128, ptr %a monotonic, align 16, !pcsections !0
@@ -10374,6 +12500,18 @@ define i128 @atomic128_load_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_load_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection379:
+; HASWELL-O3-NEXT: vmovdqa (%rdi), %xmm0
+; HASWELL-O3-NEXT: .Lpcsection380:
+; HASWELL-O3-NEXT: vmovq %xmm0, %rax
+; HASWELL-O3-NEXT: .Lpcsection381:
+; HASWELL-O3-NEXT: vpextrq $1, %xmm0, %rdx
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i128, ptr %a acquire, align 16, !pcsections !0
@@ -10463,6 +12601,18 @@ define i128 @atomic128_load_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_load_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection382:
+; HASWELL-O3-NEXT: vmovdqa (%rdi), %xmm0
+; HASWELL-O3-NEXT: .Lpcsection383:
+; HASWELL-O3-NEXT: vmovq %xmm0, %rax
+; HASWELL-O3-NEXT: .Lpcsection384:
+; HASWELL-O3-NEXT: vpextrq $1, %xmm0, %rdx
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i128, ptr %a seq_cst, align 16, !pcsections !0
@@ -10502,6 +12652,14 @@ define ptr @atomic128_load_seq_cst_ptr_ty(ptr %a) {
; O3-NEXT: movq (%rdi), %rax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_load_seq_cst_ptr_ty:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection385:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic ptr, ptr %a seq_cst, align 16, !pcsections !0
@@ -10629,6 +12787,16 @@ define void @atomic128_store_unordered(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_store_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection386:
+; HASWELL-O3-NEXT: vmovss {{.*#+}} xmm0 = [42,0,0,0]
+; HASWELL-O3-NEXT: .Lpcsection387:
+; HASWELL-O3-NEXT: vmovaps %xmm0, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i128 42, ptr %a unordered, align 16, !pcsections !0
@@ -10756,6 +12924,16 @@ define void @atomic128_store_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_store_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection388:
+; HASWELL-O3-NEXT: vmovss {{.*#+}} xmm0 = [42,0,0,0]
+; HASWELL-O3-NEXT: .Lpcsection389:
+; HASWELL-O3-NEXT: vmovaps %xmm0, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i128 42, ptr %a monotonic, align 16, !pcsections !0
@@ -10883,6 +13061,16 @@ define void @atomic128_store_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_store_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection390:
+; HASWELL-O3-NEXT: vmovss {{.*#+}} xmm0 = [42,0,0,0]
+; HASWELL-O3-NEXT: .Lpcsection391:
+; HASWELL-O3-NEXT: vmovaps %xmm0, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i128 42, ptr %a release, align 16, !pcsections !0
@@ -11010,6 +13198,18 @@ define void @atomic128_store_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_store_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection392:
+; HASWELL-O3-NEXT: vmovss {{.*#+}} xmm0 = [42,0,0,0]
+; HASWELL-O3-NEXT: .Lpcsection393:
+; HASWELL-O3-NEXT: vmovaps %xmm0, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection394:
+; HASWELL-O3-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i128 42, ptr %a seq_cst, align 16, !pcsections !0
@@ -11049,6 +13249,14 @@ define void @atomic128_store_seq_cst_ptr_ty(ptr %a, ptr %v) {
; O3-NEXT: xchgq %rsi, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_store_seq_cst_ptr_ty:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection395:
+; HASWELL-O3-NEXT: xchgq %rsi, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic ptr %v, ptr %a seq_cst, align 16, !pcsections !0
@@ -11176,6 +13384,33 @@ define void @atomic128_xchg_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection396:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection397:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection398:
+; HASWELL-O3-NEXT: movl $42, %ebx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB208_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: .Lpcsection399:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection400:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection401:
+; HASWELL-O3-NEXT: jne .LBB208_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11309,6 +13544,35 @@ define void @atomic128_add_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_add_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection402:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection403:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB209_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection404:
+; HASWELL-O3-NEXT: addq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection405:
+; HASWELL-O3-NEXT: adcq $0, %rcx
+; HASWELL-O3-NEXT: .Lpcsection406:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection407:
+; HASWELL-O3-NEXT: jne .LBB209_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11442,6 +13706,35 @@ define void @atomic128_sub_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection408:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection409:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB210_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection410:
+; HASWELL-O3-NEXT: addq $-42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection411:
+; HASWELL-O3-NEXT: adcq $-1, %rcx
+; HASWELL-O3-NEXT: .Lpcsection412:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection413:
+; HASWELL-O3-NEXT: jne .LBB210_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11574,6 +13867,34 @@ define void @atomic128_and_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_and_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection414:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection415:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB211_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection416:
+; HASWELL-O3-NEXT: andl $42, %ebx
+; HASWELL-O3-NEXT: .Lpcsection417:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection418:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection419:
+; HASWELL-O3-NEXT: jne .LBB211_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11699,6 +14020,33 @@ define void @atomic128_or_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_or_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection420:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection421:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB212_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection422:
+; HASWELL-O3-NEXT: orq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection423:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection424:
+; HASWELL-O3-NEXT: jne .LBB212_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11824,6 +14172,33 @@ define void @atomic128_xor_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection425:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection426:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB213_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection427:
+; HASWELL-O3-NEXT: xorq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection428:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection429:
+; HASWELL-O3-NEXT: jne .LBB213_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -11964,6 +14339,36 @@ define void @atomic128_nand_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection430:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection431:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection432:
+; HASWELL-O3-NEXT: movq $-1, %rcx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB214_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection433:
+; HASWELL-O3-NEXT: notl %ebx
+; HASWELL-O3-NEXT: .Lpcsection434:
+; HASWELL-O3-NEXT: orq $-43, %rbx
+; HASWELL-O3-NEXT: .Lpcsection435:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection436:
+; HASWELL-O3-NEXT: jne .LBB214_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i128 42 monotonic, align 16, !pcsections !0
@@ -12091,6 +14496,33 @@ define void @atomic128_xchg_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection437:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection438:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection439:
+; HASWELL-O3-NEXT: movl $42, %ebx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB215_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: .Lpcsection440:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection441:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection442:
+; HASWELL-O3-NEXT: jne .LBB215_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12224,6 +14656,35 @@ define void @atomic128_add_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_add_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection443:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection444:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB216_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection445:
+; HASWELL-O3-NEXT: addq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection446:
+; HASWELL-O3-NEXT: adcq $0, %rcx
+; HASWELL-O3-NEXT: .Lpcsection447:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection448:
+; HASWELL-O3-NEXT: jne .LBB216_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12357,6 +14818,35 @@ define void @atomic128_sub_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection449:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection450:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB217_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection451:
+; HASWELL-O3-NEXT: addq $-42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection452:
+; HASWELL-O3-NEXT: adcq $-1, %rcx
+; HASWELL-O3-NEXT: .Lpcsection453:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection454:
+; HASWELL-O3-NEXT: jne .LBB217_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12489,6 +14979,34 @@ define void @atomic128_and_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_and_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection455:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection456:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB218_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection457:
+; HASWELL-O3-NEXT: andl $42, %ebx
+; HASWELL-O3-NEXT: .Lpcsection458:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection459:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection460:
+; HASWELL-O3-NEXT: jne .LBB218_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12614,6 +15132,33 @@ define void @atomic128_or_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_or_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection461:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection462:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB219_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection463:
+; HASWELL-O3-NEXT: orq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection464:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection465:
+; HASWELL-O3-NEXT: jne .LBB219_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12739,6 +15284,33 @@ define void @atomic128_xor_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection466:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection467:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB220_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection468:
+; HASWELL-O3-NEXT: xorq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection469:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection470:
+; HASWELL-O3-NEXT: jne .LBB220_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -12879,6 +15451,36 @@ define void @atomic128_nand_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection471:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection472:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection473:
+; HASWELL-O3-NEXT: movq $-1, %rcx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB221_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection474:
+; HASWELL-O3-NEXT: notl %ebx
+; HASWELL-O3-NEXT: .Lpcsection475:
+; HASWELL-O3-NEXT: orq $-43, %rbx
+; HASWELL-O3-NEXT: .Lpcsection476:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection477:
+; HASWELL-O3-NEXT: jne .LBB221_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i128 42 acquire, align 16, !pcsections !0
@@ -13006,6 +15608,33 @@ define void @atomic128_xchg_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_release:
+; HASWELL-O3: # %bb.0:
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection478:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection479:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection480:
+; HASWELL-O3-NEXT: movl $42, %ebx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB222_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: .Lpcsection481:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection482:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection483:
+; HASWELL-O3-NEXT: jne .LBB222_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i128 42 release, align 16, !pcsections !0
store volatile i64 1, ptr @foo, align 8
@@ -13138,6 +15767,35 @@ define void @atomic128_add_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_add_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection484:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection485:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB223_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection486:
+; HASWELL-O3-NEXT: addq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection487:
+; HASWELL-O3-NEXT: adcq $0, %rcx
+; HASWELL-O3-NEXT: .Lpcsection488:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection489:
+; HASWELL-O3-NEXT: jne .LBB223_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13271,6 +15929,35 @@ define void @atomic128_sub_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection490:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection491:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB224_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection492:
+; HASWELL-O3-NEXT: addq $-42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection493:
+; HASWELL-O3-NEXT: adcq $-1, %rcx
+; HASWELL-O3-NEXT: .Lpcsection494:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection495:
+; HASWELL-O3-NEXT: jne .LBB224_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13403,6 +16090,34 @@ define void @atomic128_and_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_and_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection496:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection497:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB225_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection498:
+; HASWELL-O3-NEXT: andl $42, %ebx
+; HASWELL-O3-NEXT: .Lpcsection499:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection500:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection501:
+; HASWELL-O3-NEXT: jne .LBB225_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13528,6 +16243,33 @@ define void @atomic128_or_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_or_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection502:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection503:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB226_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection504:
+; HASWELL-O3-NEXT: orq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection505:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection506:
+; HASWELL-O3-NEXT: jne .LBB226_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13653,6 +16395,33 @@ define void @atomic128_xor_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection507:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection508:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB227_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection509:
+; HASWELL-O3-NEXT: xorq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection510:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection511:
+; HASWELL-O3-NEXT: jne .LBB227_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13793,6 +16562,36 @@ define void @atomic128_nand_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection512:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection513:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection514:
+; HASWELL-O3-NEXT: movq $-1, %rcx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB228_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection515:
+; HASWELL-O3-NEXT: notl %ebx
+; HASWELL-O3-NEXT: .Lpcsection516:
+; HASWELL-O3-NEXT: orq $-43, %rbx
+; HASWELL-O3-NEXT: .Lpcsection517:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection518:
+; HASWELL-O3-NEXT: jne .LBB228_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i128 42 release, align 16, !pcsections !0
@@ -13920,6 +16719,33 @@ define void @atomic128_xchg_acq_rel(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection519:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection520:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection521:
+; HASWELL-O3-NEXT: movl $42, %ebx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB229_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: .Lpcsection522:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection523:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection524:
+; HASWELL-O3-NEXT: jne .LBB229_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14053,6 +16879,35 @@ define void @atomic128_add_acq_rel(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_add_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection525:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection526:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB230_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection527:
+; HASWELL-O3-NEXT: addq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection528:
+; HASWELL-O3-NEXT: adcq $0, %rcx
+; HASWELL-O3-NEXT: .Lpcsection529:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection530:
+; HASWELL-O3-NEXT: jne .LBB230_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14186,6 +17041,35 @@ define void @atomic128_sub_acq_rel(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection531:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection532:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB231_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection533:
+; HASWELL-O3-NEXT: addq $-42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection534:
+; HASWELL-O3-NEXT: adcq $-1, %rcx
+; HASWELL-O3-NEXT: .Lpcsection535:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection536:
+; HASWELL-O3-NEXT: jne .LBB231_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14318,6 +17202,34 @@ define void @atomic128_and_acq_rel(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_and_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection537:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection538:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB232_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection539:
+; HASWELL-O3-NEXT: andl $42, %ebx
+; HASWELL-O3-NEXT: .Lpcsection540:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection541:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection542:
+; HASWELL-O3-NEXT: jne .LBB232_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14443,6 +17355,33 @@ define void @atomic128_or_acq_rel(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_or_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection543:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection544:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB233_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection545:
+; HASWELL-O3-NEXT: orq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection546:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection547:
+; HASWELL-O3-NEXT: jne .LBB233_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14568,6 +17507,33 @@ define void @atomic128_xor_acq_rel(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection548:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection549:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB234_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection550:
+; HASWELL-O3-NEXT: xorq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection551:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection552:
+; HASWELL-O3-NEXT: jne .LBB234_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14708,6 +17674,36 @@ define void @atomic128_nand_acq_rel(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection553:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection554:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection555:
+; HASWELL-O3-NEXT: movq $-1, %rcx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB235_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection556:
+; HASWELL-O3-NEXT: notl %ebx
+; HASWELL-O3-NEXT: .Lpcsection557:
+; HASWELL-O3-NEXT: orq $-43, %rbx
+; HASWELL-O3-NEXT: .Lpcsection558:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection559:
+; HASWELL-O3-NEXT: jne .LBB235_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i128 42 acq_rel, align 16, !pcsections !0
@@ -14835,6 +17831,33 @@ define void @atomic128_xchg_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xchg_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection560:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection561:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection562:
+; HASWELL-O3-NEXT: movl $42, %ebx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB236_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: .Lpcsection563:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection564:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection565:
+; HASWELL-O3-NEXT: jne .LBB236_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -14968,6 +17991,35 @@ define void @atomic128_add_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_add_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection566:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection567:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB237_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection568:
+; HASWELL-O3-NEXT: addq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection569:
+; HASWELL-O3-NEXT: adcq $0, %rcx
+; HASWELL-O3-NEXT: .Lpcsection570:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection571:
+; HASWELL-O3-NEXT: jne .LBB237_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15101,6 +18153,35 @@ define void @atomic128_sub_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_sub_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection572:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection573:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB238_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection574:
+; HASWELL-O3-NEXT: addq $-42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection575:
+; HASWELL-O3-NEXT: adcq $-1, %rcx
+; HASWELL-O3-NEXT: .Lpcsection576:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection577:
+; HASWELL-O3-NEXT: jne .LBB238_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15233,6 +18314,34 @@ define void @atomic128_and_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_and_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection578:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection579:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB239_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection580:
+; HASWELL-O3-NEXT: andl $42, %ebx
+; HASWELL-O3-NEXT: .Lpcsection581:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection582:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection583:
+; HASWELL-O3-NEXT: jne .LBB239_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15358,6 +18467,33 @@ define void @atomic128_or_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_or_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection584:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection585:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB240_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection586:
+; HASWELL-O3-NEXT: orq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection587:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection588:
+; HASWELL-O3-NEXT: jne .LBB240_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15483,6 +18619,33 @@ define void @atomic128_xor_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_xor_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection589:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection590:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB241_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movq %rax, %rbx
+; HASWELL-O3-NEXT: .Lpcsection591:
+; HASWELL-O3-NEXT: xorq $42, %rbx
+; HASWELL-O3-NEXT: movq %rdx, %rcx
+; HASWELL-O3-NEXT: .Lpcsection592:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection593:
+; HASWELL-O3-NEXT: jne .LBB241_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15623,6 +18786,36 @@ define void @atomic128_nand_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_nand_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection594:
+; HASWELL-O3-NEXT: movq (%rdi), %rax
+; HASWELL-O3-NEXT: .Lpcsection595:
+; HASWELL-O3-NEXT: movq 8(%rdi), %rdx
+; HASWELL-O3-NEXT: .Lpcsection596:
+; HASWELL-O3-NEXT: movq $-1, %rcx
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB242_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ebx
+; HASWELL-O3-NEXT: .Lpcsection597:
+; HASWELL-O3-NEXT: notl %ebx
+; HASWELL-O3-NEXT: .Lpcsection598:
+; HASWELL-O3-NEXT: orq $-43, %rbx
+; HASWELL-O3-NEXT: .Lpcsection599:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection600:
+; HASWELL-O3-NEXT: jne .LBB242_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i128 42 seq_cst, align 16, !pcsections !0
@@ -15781,6 +18974,43 @@ define void @atomic128_cas_monotonic(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection601:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection602:
+; HASWELL-O3-NEXT: movl $1, %ebx
+; HASWELL-O3-NEXT: .Lpcsection603:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection604:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection605:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection606:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection607:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection608:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection609:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection610:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection611:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection612:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection613:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i128 42, i128 1 monotonic monotonic, align 16, !pcsections !0
@@ -15941,6 +19171,43 @@ define void @atomic128_cas_acquire(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection614:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection615:
+; HASWELL-O3-NEXT: movl $1, %ebx
+; HASWELL-O3-NEXT: .Lpcsection616:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection617:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection618:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection619:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection620:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection621:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection622:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection623:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection624:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection625:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection626:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i128 42, i128 1 acquire monotonic, align 16, !pcsections !0
@@ -16101,6 +19368,43 @@ define void @atomic128_cas_release(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection627:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection628:
+; HASWELL-O3-NEXT: movl $1, %ebx
+; HASWELL-O3-NEXT: .Lpcsection629:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection630:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection631:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection632:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection633:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection634:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection635:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection636:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection637:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection638:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection639:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i128 42, i128 1 release monotonic, align 16, !pcsections !0
@@ -16261,6 +19565,43 @@ define void @atomic128_cas_acq_rel(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_acq_rel:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection640:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection641:
+; HASWELL-O3-NEXT: movl $1, %ebx
+; HASWELL-O3-NEXT: .Lpcsection642:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection643:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection644:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection645:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection646:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection647:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection648:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection649:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection650:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection651:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection652:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i128 42, i128 1 acq_rel monotonic, align 16, !pcsections !0
@@ -16421,6 +19762,43 @@ define void @atomic128_cas_seq_cst(ptr %a) {
; O3-NEXT: popq %rbx
; O3-NEXT: .cfi_def_cfa_offset 8
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic128_cas_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: pushq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16
+; HASWELL-O3-NEXT: .cfi_offset %rbx, -16
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection653:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection654:
+; HASWELL-O3-NEXT: movl $1, %ebx
+; HASWELL-O3-NEXT: .Lpcsection655:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection656:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection657:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection658:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection659:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection660:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection661:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection662:
+; HASWELL-O3-NEXT: movl $42, %eax
+; HASWELL-O3-NEXT: .Lpcsection663:
+; HASWELL-O3-NEXT: xorl %edx, %edx
+; HASWELL-O3-NEXT: .Lpcsection664:
+; HASWELL-O3-NEXT: xorl %ecx, %ecx
+; HASWELL-O3-NEXT: .Lpcsection665:
+; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi)
+; HASWELL-O3-NEXT: movq $3, foo(%rip)
+; HASWELL-O3-NEXT: popq %rbx
+; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = cmpxchg ptr %a, i128 42, i128 1 seq_cst monotonic, align 16, !pcsections !0
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 35c7c0e09f39..3004b8b72fcc 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -340,84 +340,87 @@ define i64 @cnt64(i64 %x) nounwind readnone {
define i128 @cnt128(i128 %x) nounwind readnone {
; X86-NOSSE-LABEL: cnt128:
; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ebx, %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
-; X86-NOSSE-NEXT: shrl $2, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %ebx, %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: shrl $4, %ebx
-; X86-NOSSE-NEXT: addl %edi, %ebx
-; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ebx, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: movl 24(%ebp), %eax
+; X86-NOSSE-NEXT: movl 32(%ebp), %ecx
+; X86-NOSSE-NEXT: movl 36(%ebp), %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %esi
; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %ebx, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: shrl $4, %ebx
-; X86-NOSSE-NEXT: addl %esi, %ebx
-; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %esi
-; X86-NOSSE-NEXT: addl %edi, %esi
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %edx
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X86-NOSSE-NEXT: shrl $2, %edx
-; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %edx
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %edx, %edi
-; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %ecx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %ecx
; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: addl %esi, %ecx
; X86-NOSSE-NEXT: movl %ecx, %edi
; X86-NOSSE-NEXT: shrl $4, %edi
; X86-NOSSE-NEXT: addl %ecx, %edi
+; X86-NOSSE-NEXT: movl 28(%ebp), %esi
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %ecx
; X86-NOSSE-NEXT: addl %edx, %ecx
-; X86-NOSSE-NEXT: addl %esi, %ecx
-; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: shrl $4, %esi
+; X86-NOSSE-NEXT: addl %eax, %esi
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %esi, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
+; X86-NOSSE-NEXT: addl %eax, %edx
+; X86-NOSSE-NEXT: addl %ecx, %edx
+; X86-NOSSE-NEXT: movl 8(%ebp), %eax
+; X86-NOSSE-NEXT: movl %edx, (%eax)
; X86-NOSSE-NEXT: movl $0, 12(%eax)
; X86-NOSSE-NEXT: movl $0, 8(%eax)
; X86-NOSSE-NEXT: movl $0, 4(%eax)
+; X86-NOSSE-NEXT: leal -8(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl $4
;
; X64-BASE-LABEL: cnt128:
@@ -462,20 +465,26 @@ define i128 @cnt128(i128 %x) nounwind readnone {
;
; X86-POPCNT-LABEL: cnt128:
; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %ebp
+; X86-POPCNT-NEXT: movl %esp, %ebp
; X86-POPCNT-NEXT: pushl %esi
-; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: andl $-16, %esp
+; X86-POPCNT-NEXT: subl $16, %esp
+; X86-POPCNT-NEXT: movl 8(%ebp), %eax
+; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi
; X86-POPCNT-NEXT: addl %ecx, %esi
; X86-POPCNT-NEXT: addl %edx, %esi
; X86-POPCNT-NEXT: movl %esi, (%eax)
; X86-POPCNT-NEXT: movl $0, 12(%eax)
; X86-POPCNT-NEXT: movl $0, 8(%eax)
; X86-POPCNT-NEXT: movl $0, 4(%eax)
+; X86-POPCNT-NEXT: leal -4(%ebp), %esp
; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: popl %ebp
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128:
@@ -522,7 +531,11 @@ define i128 @cnt128(i128 %x) nounwind readnone {
;
; X86-SSE2-LABEL: cnt128:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
@@ -564,11 +577,17 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X86-SSE2-NEXT: movl $0, 12(%eax)
; X86-SSE2-NEXT: movl $0, 8(%eax)
; X86-SSE2-NEXT: movl $0, 4(%eax)
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl $4
;
; X86-SSSE3-LABEL: cnt128:
; X86-SSSE3: # %bb.0:
-; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: pushl %ebp
+; X86-SSSE3-NEXT: movl %esp, %ebp
+; X86-SSSE3-NEXT: andl $-16, %esp
+; X86-SSSE3-NEXT: subl $16, %esp
+; X86-SSSE3-NEXT: movl 8(%ebp), %eax
; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
@@ -600,6 +619,8 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X86-SSSE3-NEXT: movl $0, 12(%eax)
; X86-SSSE3-NEXT: movl $0, 8(%eax)
; X86-SSSE3-NEXT: movl $0, 4(%eax)
+; X86-SSSE3-NEXT: movl %ebp, %esp
+; X86-SSSE3-NEXT: popl %ebp
; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
@@ -928,87 +949,92 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-NOSSE-LABEL: cnt128_optsize:
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NOSSE-NEXT: movl %ebx, %ecx
-; X86-NOSSE-NEXT: shrl %ecx
-; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %edi, %ecx
-; X86-NOSSE-NEXT: subl %ecx, %ebx
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: subl $16, %esp
+; X86-NOSSE-NEXT: movl 32(%ebp), %edx
+; X86-NOSSE-NEXT: movl 36(%ebp), %esi
+; X86-NOSSE-NEXT: movl %esi, %eax
+; X86-NOSSE-NEXT: shrl %eax
+; X86-NOSSE-NEXT: movl $1431655765, %ecx # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: subl %eax, %esi
; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: movl %ebx, %ebp
-; X86-NOSSE-NEXT: andl %ecx, %ebp
+; X86-NOSSE-NEXT: movl %esi, %edi
+; X86-NOSSE-NEXT: andl %ecx, %edi
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl %ecx, %esi
+; X86-NOSSE-NEXT: addl %edi, %esi
+; X86-NOSSE-NEXT: movl %esi, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %esi, %edi
+; X86-NOSSE-NEXT: movl %edx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: movl $1431655765, %eax # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %eax, %esi
+; X86-NOSSE-NEXT: subl %esi, %edx
+; X86-NOSSE-NEXT: movl %edx, %esi
+; X86-NOSSE-NEXT: andl %ecx, %esi
+; X86-NOSSE-NEXT: shrl $2, %edx
+; X86-NOSSE-NEXT: andl %ecx, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %edx, %ebx
+; X86-NOSSE-NEXT: shrl $4, %ebx
+; X86-NOSSE-NEXT: addl %edx, %ebx
+; X86-NOSSE-NEXT: movl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %edx, %edi
+; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: andl %edx, %ebx
+; X86-NOSSE-NEXT: imull $16843009, %ebx, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
+; X86-NOSSE-NEXT: addl %edi, %edx
+; X86-NOSSE-NEXT: movl 28(%ebp), %ebx
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: shrl %edi
+; X86-NOSSE-NEXT: andl %eax, %edi
+; X86-NOSSE-NEXT: subl %edi, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: andl %ecx, %edi
; X86-NOSSE-NEXT: shrl $2, %ebx
; X86-NOSSE-NEXT: andl %ecx, %ebx
-; X86-NOSSE-NEXT: addl %ebp, %ebx
-; X86-NOSSE-NEXT: movl %ebx, %ebp
-; X86-NOSSE-NEXT: shrl $4, %ebp
-; X86-NOSSE-NEXT: addl %ebx, %ebp
+; X86-NOSSE-NEXT: addl %edi, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %ebx, %edi
+; X86-NOSSE-NEXT: movl 24(%ebp), %eax
; X86-NOSSE-NEXT: movl %eax, %ebx
; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl %edi, %ebx
+; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %esi, %ebx
; X86-NOSSE-NEXT: subl %ebx, %eax
; X86-NOSSE-NEXT: movl %eax, %ebx
; X86-NOSSE-NEXT: andl %ecx, %ebx
; X86-NOSSE-NEXT: shrl $2, %eax
; X86-NOSSE-NEXT: andl %ecx, %eax
; X86-NOSSE-NEXT: addl %ebx, %eax
-; X86-NOSSE-NEXT: movl %eax, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %eax, %edi
-; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: andl %ebx, %ebp
-; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: movl %eax, %ecx
+; X86-NOSSE-NEXT: shrl $4, %ecx
+; X86-NOSSE-NEXT: addl %eax, %ecx
+; X86-NOSSE-NEXT: movl $252645135, %eax # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %eax, %edi
+; X86-NOSSE-NEXT: andl %eax, %ecx
+; X86-NOSSE-NEXT: imull $16843009, %edi, %eax # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %eax
-; X86-NOSSE-NEXT: andl %ebx, %edi
-; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edi
-; X86-NOSSE-NEXT: addl %eax, %edi
-; X86-NOSSE-NEXT: movl %esi, %eax
-; X86-NOSSE-NEXT: shrl %eax
-; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %ebp, %eax
-; X86-NOSSE-NEXT: subl %eax, %esi
-; X86-NOSSE-NEXT: movl %esi, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
-; X86-NOSSE-NEXT: shrl $2, %esi
-; X86-NOSSE-NEXT: andl %ecx, %esi
-; X86-NOSSE-NEXT: addl %eax, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebp
-; X86-NOSSE-NEXT: shrl $4, %ebp
-; X86-NOSSE-NEXT: addl %esi, %ebp
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: shrl %eax
-; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %esi, %eax
-; X86-NOSSE-NEXT: subl %eax, %edx
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
-; X86-NOSSE-NEXT: shrl $2, %edx
-; X86-NOSSE-NEXT: andl %ecx, %edx
-; X86-NOSSE-NEXT: addl %eax, %edx
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: shrl $4, %eax
-; X86-NOSSE-NEXT: addl %edx, %eax
-; X86-NOSSE-NEXT: andl %ebx, %ebp
-; X86-NOSSE-NEXT: andl %ebx, %eax
-; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %ecx
-; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: addl %ecx, %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: addl %edi, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: movl %ecx, 12(%eax)
-; X86-NOSSE-NEXT: movl %ecx, 8(%eax)
-; X86-NOSSE-NEXT: movl %ecx, 4(%eax)
-; X86-NOSSE-NEXT: movl %edx, (%eax)
+; X86-NOSSE-NEXT: addl %eax, %ecx
+; X86-NOSSE-NEXT: movl 8(%ebp), %eax
+; X86-NOSSE-NEXT: addl %edx, %ecx
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: movl %edx, 12(%eax)
+; X86-NOSSE-NEXT: movl %edx, 8(%eax)
+; X86-NOSSE-NEXT: movl %edx, 4(%eax)
+; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: leal -12(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
; X86-NOSSE-NEXT: popl %ebx
@@ -1057,13 +1083,17 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
;
; X86-POPCNT-LABEL: cnt128_optsize:
; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %ebp
+; X86-POPCNT-NEXT: movl %esp, %ebp
; X86-POPCNT-NEXT: pushl %esi
-; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: andl $-16, %esp
+; X86-POPCNT-NEXT: subl $16, %esp
+; X86-POPCNT-NEXT: movl 8(%ebp), %eax
+; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi
; X86-POPCNT-NEXT: addl %ecx, %esi
; X86-POPCNT-NEXT: addl %edx, %esi
; X86-POPCNT-NEXT: xorl %ecx, %ecx
@@ -1071,7 +1101,9 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
; X86-POPCNT-NEXT: movl %esi, (%eax)
+; X86-POPCNT-NEXT: leal -4(%ebp), %esp
; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: popl %ebp
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128_optsize:
@@ -1118,7 +1150,11 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
;
; X86-SSE2-LABEL: cnt128_optsize:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
@@ -1161,11 +1197,17 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-SSE2-NEXT: movl %ecx, 8(%eax)
; X86-SSE2-NEXT: movl %ecx, 4(%eax)
; X86-SSE2-NEXT: movl %edx, (%eax)
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl $4
;
; X86-SSSE3-LABEL: cnt128_optsize:
; X86-SSSE3: # %bb.0:
-; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: pushl %ebp
+; X86-SSSE3-NEXT: movl %esp, %ebp
+; X86-SSSE3-NEXT: andl $-16, %esp
+; X86-SSSE3-NEXT: subl $16, %esp
+; X86-SSSE3-NEXT: movl 8(%ebp), %eax
; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
@@ -1198,6 +1240,8 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-SSSE3-NEXT: movl %ecx, 8(%eax)
; X86-SSSE3-NEXT: movl %ecx, 4(%eax)
; X86-SSSE3-NEXT: movl %edx, (%eax)
+; X86-SSSE3-NEXT: movl %ebp, %esp
+; X86-SSSE3-NEXT: popl %ebp
; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
@@ -1415,85 +1459,88 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-NOSSE-LABEL: cnt128_pgso:
; X86-NOSSE: # %bb.0:
-; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ebx, %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
-; X86-NOSSE-NEXT: shrl $2, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %ebx, %edi
-; X86-NOSSE-NEXT: movl %edi, %ebx
-; X86-NOSSE-NEXT: shrl $4, %ebx
-; X86-NOSSE-NEXT: addl %edi, %ebx
-; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ebx, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
+; X86-NOSSE-NEXT: andl $-16, %esp
+; X86-NOSSE-NEXT: movl 24(%ebp), %eax
+; X86-NOSSE-NEXT: movl 32(%ebp), %ecx
+; X86-NOSSE-NEXT: movl 36(%ebp), %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %esi
; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %ebx, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebx
-; X86-NOSSE-NEXT: shrl $4, %ebx
-; X86-NOSSE-NEXT: addl %esi, %ebx
-; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %esi
-; X86-NOSSE-NEXT: addl %edi, %esi
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %edx
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X86-NOSSE-NEXT: shrl $2, %edx
-; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %edx
-; X86-NOSSE-NEXT: movl %edx, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %edx, %edi
-; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %ecx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %ecx
; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: addl %esi, %ecx
; X86-NOSSE-NEXT: movl %ecx, %edi
; X86-NOSSE-NEXT: shrl $4, %edi
; X86-NOSSE-NEXT: addl %ecx, %edi
+; X86-NOSSE-NEXT: movl 28(%ebp), %esi
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %ecx
; X86-NOSSE-NEXT: addl %edx, %ecx
-; X86-NOSSE-NEXT: addl %esi, %ecx
-; X86-NOSSE-NEXT: xorl %edx, %edx
-; X86-NOSSE-NEXT: movl %edx, 12(%eax)
-; X86-NOSSE-NEXT: movl %edx, 8(%eax)
-; X86-NOSSE-NEXT: movl %edx, 4(%eax)
-; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %esi
+; X86-NOSSE-NEXT: movl %esi, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: shrl $4, %esi
+; X86-NOSSE-NEXT: addl %eax, %esi
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %esi, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
+; X86-NOSSE-NEXT: addl %eax, %edx
+; X86-NOSSE-NEXT: movl 8(%ebp), %eax
+; X86-NOSSE-NEXT: addl %ecx, %edx
+; X86-NOSSE-NEXT: xorl %ecx, %ecx
+; X86-NOSSE-NEXT: movl %ecx, 12(%eax)
+; X86-NOSSE-NEXT: movl %ecx, 8(%eax)
+; X86-NOSSE-NEXT: movl %ecx, 4(%eax)
+; X86-NOSSE-NEXT: movl %edx, (%eax)
+; X86-NOSSE-NEXT: leal -8(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
-; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl $4
;
; X64-BASE-LABEL: cnt128_pgso:
@@ -1538,13 +1585,17 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
;
; X86-POPCNT-LABEL: cnt128_pgso:
; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %ebp
+; X86-POPCNT-NEXT: movl %esp, %ebp
; X86-POPCNT-NEXT: pushl %esi
-; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: andl $-16, %esp
+; X86-POPCNT-NEXT: subl $16, %esp
+; X86-POPCNT-NEXT: movl 8(%ebp), %eax
+; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx
+; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi
; X86-POPCNT-NEXT: addl %ecx, %esi
; X86-POPCNT-NEXT: addl %edx, %esi
; X86-POPCNT-NEXT: xorl %ecx, %ecx
@@ -1552,7 +1603,9 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
; X86-POPCNT-NEXT: movl %esi, (%eax)
+; X86-POPCNT-NEXT: leal -4(%ebp), %esp
; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: popl %ebp
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128_pgso:
@@ -1599,7 +1652,11 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
;
; X86-SSE2-LABEL: cnt128_pgso:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl %ebp
+; X86-SSE2-NEXT: movl %esp, %ebp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: movl 8(%ebp), %eax
; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
@@ -1642,11 +1699,17 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-SSE2-NEXT: movl %ecx, 8(%eax)
; X86-SSE2-NEXT: movl %ecx, 4(%eax)
; X86-SSE2-NEXT: movl %edx, (%eax)
+; X86-SSE2-NEXT: movl %ebp, %esp
+; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: retl $4
;
; X86-SSSE3-LABEL: cnt128_pgso:
; X86-SSSE3: # %bb.0:
-; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: pushl %ebp
+; X86-SSSE3-NEXT: movl %esp, %ebp
+; X86-SSSE3-NEXT: andl $-16, %esp
+; X86-SSSE3-NEXT: subl $16, %esp
+; X86-SSSE3-NEXT: movl 8(%ebp), %eax
; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
@@ -1679,6 +1742,8 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-SSSE3-NEXT: movl %ecx, 8(%eax)
; X86-SSSE3-NEXT: movl %ecx, 4(%eax)
; X86-SSSE3-NEXT: movl %edx, (%eax)
+; X86-SSSE3-NEXT: movl %ebp, %esp
+; X86-SSSE3-NEXT: popl %ebp
; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
diff --git a/llvm/test/CodeGen/X86/pr46004.ll b/llvm/test/CodeGen/X86/pr46004.ll
index f7c7da089c36..829d6dfceba3 100644
--- a/llvm/test/CodeGen/X86/pr46004.ll
+++ b/llvm/test/CodeGen/X86/pr46004.ll
@@ -6,7 +6,17 @@
define void @fuzz22357(i128 %a0) {
; X86-LABEL: fuzz22357:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
; X86-NEXT: movb $0, (%eax)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
; X64-LABEL: fuzz22357:
@@ -24,6 +34,15 @@ define void @fuzz22357(i128 %a0) {
define void @fuzz22723(i128 %a0) {
; X86-LABEL: fuzz22723:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
; X64-LABEL: fuzz22723:
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
index 50a967e1c2a1..ce9723b3a84b 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
@@ -762,11 +762,15 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind {
define i32 @t_to_u32(fp128 %a) nounwind {
; X86-AVX512-WIN-LABEL: t_to_u32:
; X86-AVX512-WIN: # %bb.0:
-; X86-AVX512-WIN-NEXT: subl $16, %esp
-; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX512-WIN-NEXT: pushl %ebp
+; X86-AVX512-WIN-NEXT: movl %esp, %ebp
+; X86-AVX512-WIN-NEXT: andl $-16, %esp
+; X86-AVX512-WIN-NEXT: subl $32, %esp
+; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0
; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp)
; X86-AVX512-WIN-NEXT: calll ___fixunstfsi
-; X86-AVX512-WIN-NEXT: addl $16, %esp
+; X86-AVX512-WIN-NEXT: movl %ebp, %esp
+; X86-AVX512-WIN-NEXT: popl %ebp
; X86-AVX512-WIN-NEXT: retl
;
; X86-AVX512-LIN-LABEL: t_to_u32:
@@ -797,12 +801,18 @@ define i32 @t_to_u32(fp128 %a) nounwind {
;
; X86-SSE-WIN-LABEL: t_to_u32:
; X86-SSE-WIN: # %bb.0:
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE-WIN-NEXT: pushl %ebp
+; X86-SSE-WIN-NEXT: movl %esp, %ebp
+; X86-SSE-WIN-NEXT: andl $-16, %esp
+; X86-SSE-WIN-NEXT: subl $16, %esp
+; X86-SSE-WIN-NEXT: pushl 20(%ebp)
+; X86-SSE-WIN-NEXT: pushl 16(%ebp)
+; X86-SSE-WIN-NEXT: pushl 12(%ebp)
+; X86-SSE-WIN-NEXT: pushl 8(%ebp)
; X86-SSE-WIN-NEXT: calll ___fixunstfsi
; X86-SSE-WIN-NEXT: addl $16, %esp
+; X86-SSE-WIN-NEXT: movl %ebp, %esp
+; X86-SSE-WIN-NEXT: popl %ebp
; X86-SSE-WIN-NEXT: retl
;
; X86-SSE-LIN-LABEL: t_to_u32:
@@ -835,12 +845,18 @@ define i32 @t_to_u32(fp128 %a) nounwind {
;
; X87-WIN-LABEL: t_to_u32:
; X87-WIN: # %bb.0:
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT: pushl %ebp
+; X87-WIN-NEXT: movl %esp, %ebp
+; X87-WIN-NEXT: andl $-16, %esp
+; X87-WIN-NEXT: subl $16, %esp
+; X87-WIN-NEXT: pushl 20(%ebp)
+; X87-WIN-NEXT: pushl 16(%ebp)
+; X87-WIN-NEXT: pushl 12(%ebp)
+; X87-WIN-NEXT: pushl 8(%ebp)
; X87-WIN-NEXT: calll ___fixunstfsi
; X87-WIN-NEXT: addl $16, %esp
+; X87-WIN-NEXT: movl %ebp, %esp
+; X87-WIN-NEXT: popl %ebp
; X87-WIN-NEXT: retl
;
; X87-LIN-LABEL: t_to_u32:
@@ -860,11 +876,15 @@ define i32 @t_to_u32(fp128 %a) nounwind {
define i32 @t_to_s32(fp128 %a) nounwind {
; X86-AVX512-WIN-LABEL: t_to_s32:
; X86-AVX512-WIN: # %bb.0:
-; X86-AVX512-WIN-NEXT: subl $16, %esp
-; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX512-WIN-NEXT: pushl %ebp
+; X86-AVX512-WIN-NEXT: movl %esp, %ebp
+; X86-AVX512-WIN-NEXT: andl $-16, %esp
+; X86-AVX512-WIN-NEXT: subl $32, %esp
+; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0
; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp)
; X86-AVX512-WIN-NEXT: calll ___fixtfsi
-; X86-AVX512-WIN-NEXT: addl $16, %esp
+; X86-AVX512-WIN-NEXT: movl %ebp, %esp
+; X86-AVX512-WIN-NEXT: popl %ebp
; X86-AVX512-WIN-NEXT: retl
;
; X86-AVX512-LIN-LABEL: t_to_s32:
@@ -895,12 +915,18 @@ define i32 @t_to_s32(fp128 %a) nounwind {
;
; X86-SSE-WIN-LABEL: t_to_s32:
; X86-SSE-WIN: # %bb.0:
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE-WIN-NEXT: pushl %ebp
+; X86-SSE-WIN-NEXT: movl %esp, %ebp
+; X86-SSE-WIN-NEXT: andl $-16, %esp
+; X86-SSE-WIN-NEXT: subl $16, %esp
+; X86-SSE-WIN-NEXT: pushl 20(%ebp)
+; X86-SSE-WIN-NEXT: pushl 16(%ebp)
+; X86-SSE-WIN-NEXT: pushl 12(%ebp)
+; X86-SSE-WIN-NEXT: pushl 8(%ebp)
; X86-SSE-WIN-NEXT: calll ___fixtfsi
; X86-SSE-WIN-NEXT: addl $16, %esp
+; X86-SSE-WIN-NEXT: movl %ebp, %esp
+; X86-SSE-WIN-NEXT: popl %ebp
; X86-SSE-WIN-NEXT: retl
;
; X86-SSE-LIN-LABEL: t_to_s32:
@@ -933,12 +959,18 @@ define i32 @t_to_s32(fp128 %a) nounwind {
;
; X87-WIN-LABEL: t_to_s32:
; X87-WIN: # %bb.0:
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT: pushl %ebp
+; X87-WIN-NEXT: movl %esp, %ebp
+; X87-WIN-NEXT: andl $-16, %esp
+; X87-WIN-NEXT: subl $16, %esp
+; X87-WIN-NEXT: pushl 20(%ebp)
+; X87-WIN-NEXT: pushl 16(%ebp)
+; X87-WIN-NEXT: pushl 12(%ebp)
+; X87-WIN-NEXT: pushl 8(%ebp)
; X87-WIN-NEXT: calll ___fixtfsi
; X87-WIN-NEXT: addl $16, %esp
+; X87-WIN-NEXT: movl %ebp, %esp
+; X87-WIN-NEXT: popl %ebp
; X87-WIN-NEXT: retl
;
; X87-LIN-LABEL: t_to_s32:
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
index f516db8b30ff..3287869f2c60 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -1417,11 +1417,15 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind {
define i64 @t_to_u64(fp128 %a) nounwind {
; X86-AVX512-WIN-LABEL: t_to_u64:
; X86-AVX512-WIN: # %bb.0:
-; X86-AVX512-WIN-NEXT: subl $16, %esp
-; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX512-WIN-NEXT: pushl %ebp
+; X86-AVX512-WIN-NEXT: movl %esp, %ebp
+; X86-AVX512-WIN-NEXT: andl $-16, %esp
+; X86-AVX512-WIN-NEXT: subl $32, %esp
+; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0
; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp)
; X86-AVX512-WIN-NEXT: calll ___fixunstfdi
-; X86-AVX512-WIN-NEXT: addl $16, %esp
+; X86-AVX512-WIN-NEXT: movl %ebp, %esp
+; X86-AVX512-WIN-NEXT: popl %ebp
; X86-AVX512-WIN-NEXT: retl
;
; X86-AVX512-LIN-LABEL: t_to_u64:
@@ -1452,12 +1456,18 @@ define i64 @t_to_u64(fp128 %a) nounwind {
;
; X86-SSE-WIN-LABEL: t_to_u64:
; X86-SSE-WIN: # %bb.0:
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE-WIN-NEXT: pushl %ebp
+; X86-SSE-WIN-NEXT: movl %esp, %ebp
+; X86-SSE-WIN-NEXT: andl $-16, %esp
+; X86-SSE-WIN-NEXT: subl $16, %esp
+; X86-SSE-WIN-NEXT: pushl 20(%ebp)
+; X86-SSE-WIN-NEXT: pushl 16(%ebp)
+; X86-SSE-WIN-NEXT: pushl 12(%ebp)
+; X86-SSE-WIN-NEXT: pushl 8(%ebp)
; X86-SSE-WIN-NEXT: calll ___fixunstfdi
; X86-SSE-WIN-NEXT: addl $16, %esp
+; X86-SSE-WIN-NEXT: movl %ebp, %esp
+; X86-SSE-WIN-NEXT: popl %ebp
; X86-SSE-WIN-NEXT: retl
;
; X86-SSE-LIN-LABEL: t_to_u64:
@@ -1490,12 +1500,18 @@ define i64 @t_to_u64(fp128 %a) nounwind {
;
; X87-WIN-LABEL: t_to_u64:
; X87-WIN: # %bb.0:
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT: pushl %ebp
+; X87-WIN-NEXT: movl %esp, %ebp
+; X87-WIN-NEXT: andl $-16, %esp
+; X87-WIN-NEXT: subl $16, %esp
+; X87-WIN-NEXT: pushl 20(%ebp)
+; X87-WIN-NEXT: pushl 16(%ebp)
+; X87-WIN-NEXT: pushl 12(%ebp)
+; X87-WIN-NEXT: pushl 8(%ebp)
; X87-WIN-NEXT: calll ___fixunstfdi
; X87-WIN-NEXT: addl $16, %esp
+; X87-WIN-NEXT: movl %ebp, %esp
+; X87-WIN-NEXT: popl %ebp
; X87-WIN-NEXT: retl
;
; X87-LIN-LABEL: t_to_u64:
@@ -1515,11 +1531,15 @@ define i64 @t_to_u64(fp128 %a) nounwind {
define i64 @t_to_s64(fp128 %a) nounwind {
; X86-AVX512-WIN-LABEL: t_to_s64:
; X86-AVX512-WIN: # %bb.0:
-; X86-AVX512-WIN-NEXT: subl $16, %esp
-; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX512-WIN-NEXT: pushl %ebp
+; X86-AVX512-WIN-NEXT: movl %esp, %ebp
+; X86-AVX512-WIN-NEXT: andl $-16, %esp
+; X86-AVX512-WIN-NEXT: subl $32, %esp
+; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0
; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp)
; X86-AVX512-WIN-NEXT: calll ___fixtfdi
-; X86-AVX512-WIN-NEXT: addl $16, %esp
+; X86-AVX512-WIN-NEXT: movl %ebp, %esp
+; X86-AVX512-WIN-NEXT: popl %ebp
; X86-AVX512-WIN-NEXT: retl
;
; X86-AVX512-LIN-LABEL: t_to_s64:
@@ -1550,12 +1570,18 @@ define i64 @t_to_s64(fp128 %a) nounwind {
;
; X86-SSE-WIN-LABEL: t_to_s64:
; X86-SSE-WIN: # %bb.0:
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE-WIN-NEXT: pushl %ebp
+; X86-SSE-WIN-NEXT: movl %esp, %ebp
+; X86-SSE-WIN-NEXT: andl $-16, %esp
+; X86-SSE-WIN-NEXT: subl $16, %esp
+; X86-SSE-WIN-NEXT: pushl 20(%ebp)
+; X86-SSE-WIN-NEXT: pushl 16(%ebp)
+; X86-SSE-WIN-NEXT: pushl 12(%ebp)
+; X86-SSE-WIN-NEXT: pushl 8(%ebp)
; X86-SSE-WIN-NEXT: calll ___fixtfdi
; X86-SSE-WIN-NEXT: addl $16, %esp
+; X86-SSE-WIN-NEXT: movl %ebp, %esp
+; X86-SSE-WIN-NEXT: popl %ebp
; X86-SSE-WIN-NEXT: retl
;
; X86-SSE-LIN-LABEL: t_to_s64:
@@ -1588,12 +1614,18 @@ define i64 @t_to_s64(fp128 %a) nounwind {
;
; X87-WIN-LABEL: t_to_s64:
; X87-WIN: # %bb.0:
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
-; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT: pushl %ebp
+; X87-WIN-NEXT: movl %esp, %ebp
+; X87-WIN-NEXT: andl $-16, %esp
+; X87-WIN-NEXT: subl $16, %esp
+; X87-WIN-NEXT: pushl 20(%ebp)
+; X87-WIN-NEXT: pushl 16(%ebp)
+; X87-WIN-NEXT: pushl 12(%ebp)
+; X87-WIN-NEXT: pushl 8(%ebp)
; X87-WIN-NEXT: calll ___fixtfdi
; X87-WIN-NEXT: addl $16, %esp
+; X87-WIN-NEXT: movl %ebp, %esp
+; X87-WIN-NEXT: popl %ebp
; X87-WIN-NEXT: retl
;
; X87-LIN-LABEL: t_to_s64:
diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll
index 874913629e9e..8a287229a1cb 100644
--- a/llvm/test/CodeGen/X86/scmp.ll
+++ b/llvm/test/CodeGen/X86/scmp.ll
@@ -118,30 +118,33 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; X86-LABEL: scmp.8.128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: sbbl %eax, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: sbbl %ebp, %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %edi
+; X86-NEXT: cmpl %ecx, 8(%ebp)
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: movl 16(%ebp), %ebx
+; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl 20(%ebp), %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %ebp
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: cmpl 8(%ebp), %esi
+; X86-NEXT: sbbl 12(%ebp), %eax
+; X86-NEXT: sbbl 16(%ebp), %edi
+; X86-NEXT: sbbl %edx, %ebx
; X86-NEXT: setl %al
; X86-NEXT: subb %cl, %al
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/sdiv_fix.ll b/llvm/test/CodeGen/X86/sdiv_fix.ll
index 4925f8bc6c8b..392bc83d9d5d 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix.ll
@@ -307,69 +307,70 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $80, %esp
+; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %edi
+; X86-NEXT: movl 16(%ebp), %eax
; X86-NEXT: movl 20(%ebp), %edx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: shldl $31, %eax, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $31, %ecx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: shldl $31, %edi, %esi
+; X86-NEXT: shldl $31, %ecx, %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: shll $31, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %ecx
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl $1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ebx
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: sets %al
-; X86-NEXT: testl %edi, %edi
-; X86-NEXT: sets %cl
-; X86-NEXT: xorb %al, %cl
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 20(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 20(%ebp)
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $1, %esi
+; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: sets %al
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: sets %bl
+; X86-NEXT: xorb %al, %bl
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
-; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: testb %bl, %al
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edi, %edx
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index e7727a0ab617..7df490f98492 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -370,67 +370,68 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $96, %esp
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl 12(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %esi
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $31, %eax, %edi
-; X86-NEXT: shldl $31, %ecx, %eax
+; X86-NEXT: subl $128, %esp
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl 12(%ebp), %edi
+; X86-NEXT: movl 16(%ebp), %ecx
+; X86-NEXT: movl 20(%ebp), %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shll $31, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %ecx
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: shldl $31, %edi, %ebx
+; X86-NEXT: shldl $31, %esi, %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: shll $31, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 20(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl $1, %esi
-; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $1, %edi
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: sets %al
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sbbl $0, %ebx
; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: sets %dl
-; X86-NEXT: xorb %al, %dl
-; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl 20(%ebp)
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl %eax
+; X86-NEXT: sets %al
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: sets %cl
+; X86-NEXT: xorb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -438,41 +439,38 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovel %esi, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: cmpl $-1, %esi
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: cmpl $-1, %edi
; X86-NEXT: sbbl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: sbbl $0, %ecx
; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT: cmovll %eax, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovgel %ecx, %edi
-; X86-NEXT: movl %edi, %eax
; X86-NEXT: cmovgel %ecx, %ebx
+; X86-NEXT: cmovgel %ecx, %eax
; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovgel %ecx, %esi
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: negl %edi
-; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: sbbl %ebx, %edi
-; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: cmovgel %ecx, %edi
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: negl %esi
+; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl $-1, %esi
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmovgel %eax, %esi
+; X86-NEXT: cmovgel %eax, %edi
; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: cmovgel %eax, %edx
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -805,137 +803,155 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $208, %esp
-; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: subl $240, %esp
+; X86-NEXT: movl 12(%ebp), %esi
+; X86-NEXT: movl 20(%ebp), %edi
; X86-NEXT: movl 16(%ebp), %ebx
-; X86-NEXT: movl 32(%ebp), %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: leal (%ebx,%ebx), %eax
; X86-NEXT: shrl $31, %ebx
; X86-NEXT: shldl $31, %eax, %ebx
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl 20(%ebp), %ecx
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 36(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: leal (%ecx,%ecx), %edx
-; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: shldl $31, %edx, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 36(%ebp)
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %edx
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: leal (%edi,%edi), %eax
+; X86-NEXT: shrl $31, %edi
+; X86-NEXT: shldl $31, %eax, %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl 32(%ebp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 36(%ebp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl 28(%ebp), %edx
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %eax, %edi
; X86-NEXT: sarl $31, %edi
-; X86-NEXT: leal (%ecx,%ecx), %eax
-; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: shldl $31, %eax, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: leal (%esi,%esi), %eax
+; X86-NEXT: shrl $31, %esi
+; X86-NEXT: shldl $31, %eax, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl 40(%ebp), %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl 24(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: leal (%ecx,%ecx), %edx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: leal (%ecx,%ecx), %eax
; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: shldl $31, %edx, %ecx
+; X86-NEXT: shldl $31, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl 40(%ebp)
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %edx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl 28(%ebp)
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __divti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %eax
@@ -949,18 +965,18 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: testl %edi, %edi
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: sets %bl
-; X86-NEXT: testl %edi, %edi
-; X86-NEXT: sets %bh
-; X86-NEXT: xorb %bl, %bh
+; X86-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
; X86-NEXT: orl %edi, %esi
-; X86-NEXT: setne %bl
-; X86-NEXT: testb %bh, %bl
+; X86-NEXT: setne %bh
+; X86-NEXT: testb %bl, %bh
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
@@ -1107,36 +1123,24 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: sets %al
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: sets %ah
-; X86-NEXT: xorb %al, %ah
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl 40(%ebp)
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %eax
+; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: sets %cl
+; X86-NEXT: xorb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: calll __modti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -1144,38 +1148,38 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: cmpl $-1, %ebx
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmovgel %eax, %esi
-; X86-NEXT: cmovgel %eax, %ecx
; X86-NEXT: cmovgel %eax, %edi
+; X86-NEXT: cmovgel %eax, %ecx
+; X86-NEXT: cmovgel %eax, %esi
; X86-NEXT: movl $-1, %edx
; X86-NEXT: cmovgel %edx, %ebx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: movl $-1, %eax
-; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl $-1, %eax
; X86-NEXT: sbbl %ecx, %eax
; X86-NEXT: movl $-1, %eax
-; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: movl $0, %eax
; X86-NEXT: cmovgel %eax, %ebx
-; X86-NEXT: cmovgel %edx, %edi
-; X86-NEXT: shldl $31, %ebx, %edi
+; X86-NEXT: cmovgel %edx, %esi
+; X86-NEXT: shldl $31, %ebx, %esi
; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index 76cb4e87bae1..dfeef48897e0 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -792,14 +792,24 @@ define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32
define void @combineShiftOfShiftedLogic(i128 %a1, i32 %a2, ptr %p) {
; X86-LABEL: combineShiftOfShiftedLogic:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: movl %eax, 20(%ecx)
; X86-NEXT: movl $0, 16(%ecx)
; X86-NEXT: movl $0, 12(%ecx)
; X86-NEXT: movl $0, 8(%ecx)
; X86-NEXT: movl $0, 4(%ecx)
; X86-NEXT: movl $0, (%ecx)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
;
; X64-LABEL: combineShiftOfShiftedLogic:
diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 767bd772ab7a..9323cd5b1917 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -212,9 +212,18 @@ entry:
}
define void @test_lshr_i128_outofrange(i128 %x, ptr nocapture %r) nounwind {
-; ALL-LABEL: test_lshr_i128_outofrange:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: ret{{[l|q]}}
+; i686-LABEL: test_lshr_i128_outofrange:
+; i686: # %bb.0: # %entry
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: movl %ebp, %esp
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl
+;
+; x86_64-LABEL: test_lshr_i128_outofrange:
+; x86_64: # %bb.0: # %entry
+; x86_64-NEXT: retq
entry:
%0 = lshr i128 %x, -1
store i128 %0, ptr %r, align 16
@@ -222,9 +231,18 @@ entry:
}
define void @test_ashr_i128_outofrange(i128 %x, ptr nocapture %r) nounwind {
-; ALL-LABEL: test_ashr_i128_outofrange:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: ret{{[l|q]}}
+; i686-LABEL: test_ashr_i128_outofrange:
+; i686: # %bb.0: # %entry
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: movl %ebp, %esp
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl
+;
+; x86_64-LABEL: test_ashr_i128_outofrange:
+; x86_64: # %bb.0: # %entry
+; x86_64-NEXT: retq
entry:
%0 = ashr i128 %x, -1
store i128 %0, ptr %r, align 16
@@ -232,9 +250,18 @@ entry:
}
define void @test_shl_i128_outofrange(i128 %x, ptr nocapture %r) nounwind {
-; ALL-LABEL: test_shl_i128_outofrange:
-; ALL: # %bb.0: # %entry
-; ALL-NEXT: ret{{[l|q]}}
+; i686-LABEL: test_shl_i128_outofrange:
+; i686: # %bb.0: # %entry
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: movl %ebp, %esp
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl
+;
+; x86_64-LABEL: test_shl_i128_outofrange:
+; x86_64: # %bb.0: # %entry
+; x86_64-NEXT: retq
entry:
%0 = shl i128 %x, -1
store i128 %0, ptr %r, align 16
@@ -874,26 +901,31 @@ define <2 x i256> @shl_zext_lshr_outofrange(<2 x i128> %a0) {
define i128 @lshr_shl_mask(i128 %a0) {
; i686-LABEL: lshr_shl_mask:
; i686: # %bb.0:
-; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %ebp
; i686-NEXT: .cfi_def_cfa_offset 8
+; i686-NEXT: .cfi_offset %ebp, -8
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: .cfi_def_cfa_register %ebp
+; i686-NEXT: pushl %edi
; i686-NEXT: pushl %esi
-; i686-NEXT: .cfi_def_cfa_offset 12
-; i686-NEXT: .cfi_offset %esi, -12
-; i686-NEXT: .cfi_offset %edi, -8
-; i686-NEXT: movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT: movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT: movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: .cfi_offset %esi, -16
+; i686-NEXT: .cfi_offset %edi, -12
+; i686-NEXT: movl 8(%ebp), %eax
+; i686-NEXT: movl 24(%ebp), %ecx
+; i686-NEXT: movl 28(%ebp), %edx
+; i686-NEXT: movl 32(%ebp), %esi
; i686-NEXT: movl $2147483647, %edi # imm = 0x7FFFFFFF
-; i686-NEXT: andl {{[0-9]+}}(%esp), %edi
+; i686-NEXT: andl 36(%ebp), %edi
; i686-NEXT: movl %edi, 12(%eax)
; i686-NEXT: movl %esi, 8(%eax)
; i686-NEXT: movl %edx, 4(%eax)
; i686-NEXT: movl %ecx, (%eax)
+; i686-NEXT: leal -8(%ebp), %esp
; i686-NEXT: popl %esi
-; i686-NEXT: .cfi_def_cfa_offset 8
; i686-NEXT: popl %edi
-; i686-NEXT: .cfi_def_cfa_offset 4
+; i686-NEXT: popl %ebp
+; i686-NEXT: .cfi_def_cfa %esp, 4
; i686-NEXT: retl $4
;
; x86_64-LABEL: lshr_shl_mask:
diff --git a/llvm/test/CodeGen/X86/sincos-stack-args.ll b/llvm/test/CodeGen/X86/sincos-stack-args.ll
index fe1533083333..42c05a3e7a9b 100644
--- a/llvm/test/CodeGen/X86/sincos-stack-args.ll
+++ b/llvm/test/CodeGen/X86/sincos-stack-args.ll
@@ -7,11 +7,10 @@ declare double @g(double, double)
; Though not visible within the IR, this will lower to an FSINCOS node, with
; store users, that are within a (callseq_start, callseq_end) pair. In this
; case, the stores cannot be folded into the sincos call.
-define double @negative_sincos_with_stores_within_call_sequence(double %a) {
+define double @negative_sincos_with_stores_within_call_sequence(double %a) nounwind {
; CHECK-LABEL: negative_sincos_with_stores_within_call_sequence:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subl $44, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: fldl 48(%esp)
; CHECK-NEXT: leal 24(%esp), %eax
; CHECK-NEXT: movl %eax, 12(%esp)
@@ -27,7 +26,6 @@ define double @negative_sincos_with_stores_within_call_sequence(double %a) {
; CHECK-NEXT: fstpl (%esp)
; CHECK-NEXT: calll g@PLT
; CHECK-NEXT: addl $44, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
entry:
%0 = tail call double @llvm.sin.f64(double %a)
diff --git a/llvm/test/CodeGen/X86/sincos.ll b/llvm/test/CodeGen/X86/sincos.ll
index 42b1555e78b1..79034077f183 100644
--- a/llvm/test/CodeGen/X86/sincos.ll
+++ b/llvm/test/CodeGen/X86/sincos.ll
@@ -9,11 +9,10 @@ declare double @sin(double) readonly
declare x86_fp80 @sinl(x86_fp80) readonly
-define float @test1(float %X) {
+define float @test1(float %X) nounwind {
; CHECK-LABEL: test1:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $12, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: calll _sinf
@@ -23,11 +22,10 @@ define float @test1(float %X) {
ret float %Y
}
-define double @test2(double %X) {
+define double @test2(double %X) nounwind {
; CHECK-LABEL: test2:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $12, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
; CHECK-NEXT: calll _sin
@@ -37,11 +35,10 @@ define double @test2(double %X) {
ret double %Y
}
-define x86_fp80 @test3(x86_fp80 %X) {
+define x86_fp80 @test3(x86_fp80 %X) nounwind {
; CHECK-LABEL: test3:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $28, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: fldt {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpt (%esp)
; CHECK-NEXT: calll _sinl
@@ -58,11 +55,10 @@ declare double @cos(double) readonly
declare x86_fp80 @cosl(x86_fp80) readonly
-define float @test4(float %X) {
+define float @test4(float %X) nounwind {
; CHECK-LABEL: test4:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $12, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: calll _cosf
@@ -72,11 +68,10 @@ define float @test4(float %X) {
ret float %Y
}
-define double @test5(double %X) {
+define double @test5(double %X) nounwind {
; CHECK-LABEL: test5:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $12, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
; CHECK-NEXT: calll _cos
@@ -86,11 +81,10 @@ define double @test5(double %X) {
ret double %Y
}
-define x86_fp80 @test6(x86_fp80 %X) {
+define x86_fp80 @test6(x86_fp80 %X) nounwind {
; CHECK-LABEL: test6:
; CHECK: ## %bb.0:
; CHECK-NEXT: subl $28, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: fldt {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpt (%esp)
; CHECK-NEXT: calll _cosl
diff --git a/llvm/test/CodeGen/X86/smax.ll b/llvm/test/CodeGen/X86/smax.ll
index 86891e964d96..509d4443e930 100644
--- a/llvm/test/CodeGen/X86/smax.ll
+++ b/llvm/test/CodeGen/X86/smax.ll
@@ -151,31 +151,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl %ebx, %edx
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmpl 24(%ebp), %ebx
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sbbl 28(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 52(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: cmovll %ebx, %edx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: cmovll 24(%ebp), %ebx
+; X86-NEXT: cmovll 28(%ebp), %edi
+; X86-NEXT: cmovll 32(%ebp), %edx
+; X86-NEXT: cmovll %esi, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -717,29 +720,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
;
; X86-LABEL: test_signbits_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shrdl $28, %edi, %ecx
-; X86-NEXT: sarl $28, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: shrdl $28, %edx, %ecx
+; X86-NEXT: sarl $28, %edx
; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: cmovll %esi, %ecx
-; X86-NEXT: cmovll %edx, %edi
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: cmovll %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%ax = ashr i128 %a, 64
%bx = ashr i128 %b, 92
diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll
index 8907f6c4cd59..5e9fe27b41d2 100644
--- a/llvm/test/CodeGen/X86/smin.ll
+++ b/llvm/test/CodeGen/X86/smin.ll
@@ -151,32 +151,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl %edx, %ebx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: cmpl %ecx, 24(%ebp)
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %esi, %ebx
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: cmovll %ebx, %edx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmovll %edi, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebp, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: cmovll 24(%ebp), %ecx
+; X86-NEXT: cmovll 28(%ebp), %edx
+; X86-NEXT: cmovll 32(%ebp), %esi
+; X86-NEXT: cmovll %edi, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -718,29 +720,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
;
; X86-LABEL: test_signbits_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shrdl $28, %edi, %ecx
-; X86-NEXT: sarl $28, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: shrdl $28, %edx, %ecx
+; X86-NEXT: sarl $28, %edx
; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: cmovll %esi, %ecx
-; X86-NEXT: cmovll %edx, %edi
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: cmovll %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%ax = ashr i128 %a, 64
%bx = ashr i128 %b, 92
diff --git a/llvm/test/CodeGen/X86/stack-align2.ll b/llvm/test/CodeGen/X86/stack-align2.ll
index 99f36d2ca8b7..095a9090ed08 100644
--- a/llvm/test/CodeGen/X86/stack-align2.ll
+++ b/llvm/test/CodeGen/X86/stack-align2.ll
@@ -3,13 +3,11 @@
; RUN: llc < %s -mcpu=generic -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386
; RUN: llc < %s -mcpu=generic -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386
; RUN: llc < %s -mcpu=generic -mtriple=i386-pc-solaris2.11 | FileCheck %s -check-prefix=SOLARIS-I386
-; RUN: llc < %s -mcpu=generic -mtriple=i386-nacl | FileCheck %s -check-prefix=NACL-I386
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-kfreebsd | FileCheck %s -check-prefix=KFREEBSD-X86_64
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-solaris2.11 | FileCheck %s -check-prefix=SOLARIS-X86_64
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-nacl | FileCheck %s -check-prefix=NACL-X86_64
define i32 @test() nounwind {
entry:
@@ -19,7 +17,6 @@ entry:
; LINUX-I386: subl $12, %esp
; KFREEBSD-I386: subl $12, %esp
; DARWIN-I386: subl $12, %esp
-; NACL-I386: subl $12, %esp
; NETBSD-I386-NOT: subl {{.*}}, %esp
; SOLARIS-I386-NOT: subl {{.*}}, %esp
@@ -27,8 +24,6 @@ entry:
; LINUX-X86_64-NOT: subq {{.*}}, %rsp
; DARWIN-X86_64: pushq %{{.*}}
; DARWIN-X86_64-NOT: subq {{.*}}, %rsp
-; NACL-X86_64: pushq %{{.*}}
-; NACL-X86_64-NOT: subq {{.*}}, %rsp
; NETBSD-X86_64: pushq %{{.*}}
; NETBSD-X86_64-NOT: subq {{.*}}, %rsp
; SOLARIS-X86_64: pushq %{{.*}}
diff --git a/llvm/test/CodeGen/X86/stack-protector-target-openbsd.ll b/llvm/test/CodeGen/X86/stack-protector-target-openbsd.ll
new file mode 100644
index 000000000000..06382c6bbbbe
--- /dev/null
+++ b/llvm/test/CodeGen/X86/stack-protector-target-openbsd.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=i386--linux < %s | FileCheck -check-prefix=LINUX32 %s
+; RUN: llc -mtriple=x86_64--linux < %s | FileCheck -check-prefix=LINUX64 %s
+; RUN: llc -mtriple=i386--openbsd < %s | FileCheck -check-prefix=OPENBSD32 %s
+; RUN: llc -mtriple=x86_64--openbsd < %s | FileCheck -check-prefix=OPENBSD64 %s
+
+define void @func() sspreq nounwind {
+; LINUX32-LABEL: func:
+; LINUX32: # %bb.0:
+; LINUX32-NEXT: subl $12, %esp
+; LINUX32-NEXT: movl %gs:20, %eax
+; LINUX32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; LINUX32-NEXT: leal {{[0-9]+}}(%esp), %eax
+; LINUX32-NEXT: movl %eax, (%esp)
+; LINUX32-NEXT: calll capture@PLT
+; LINUX32-NEXT: movl %gs:20, %eax
+; LINUX32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; LINUX32-NEXT: jne .LBB0_2
+; LINUX32-NEXT: # %bb.1: # %SP_return
+; LINUX32-NEXT: addl $12, %esp
+; LINUX32-NEXT: retl
+; LINUX32-NEXT: .LBB0_2: # %CallStackCheckFailBlk
+; LINUX32-NEXT: calll __stack_chk_fail@PLT
+;
+; LINUX64-LABEL: func:
+; LINUX64: # %bb.0:
+; LINUX64-NEXT: subq $24, %rsp
+; LINUX64-NEXT: movq %fs:40, %rax
+; LINUX64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; LINUX64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; LINUX64-NEXT: callq capture@PLT
+; LINUX64-NEXT: movq %fs:40, %rax
+; LINUX64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; LINUX64-NEXT: jne .LBB0_2
+; LINUX64-NEXT: # %bb.1: # %SP_return
+; LINUX64-NEXT: addq $24, %rsp
+; LINUX64-NEXT: retq
+; LINUX64-NEXT: .LBB0_2: # %CallStackCheckFailBlk
+; LINUX64-NEXT: callq __stack_chk_fail@PLT
+;
+; OPENBSD32-LABEL: func:
+; OPENBSD32: # %bb.0:
+; OPENBSD32-NEXT: subl $8, %esp
+; OPENBSD32-NEXT: movl __guard_local, %eax
+; OPENBSD32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; OPENBSD32-NEXT: movl %esp, %eax
+; OPENBSD32-NEXT: pushl %eax
+; OPENBSD32-NEXT: calll capture@PLT
+; OPENBSD32-NEXT: addl $4, %esp
+; OPENBSD32-NEXT: movl __guard_local, %eax
+; OPENBSD32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; OPENBSD32-NEXT: jne .LBB0_2
+; OPENBSD32-NEXT: # %bb.1: # %SP_return
+; OPENBSD32-NEXT: addl $8, %esp
+; OPENBSD32-NEXT: retl
+; OPENBSD32-NEXT: .LBB0_2: # %CallStackCheckFailBlk
+; OPENBSD32-NEXT: pushl $.LSSH
+; OPENBSD32-NEXT: calll __stack_smash_handler@PLT
+;
+; OPENBSD64-LABEL: func:
+; OPENBSD64: # %bb.0:
+; OPENBSD64-NEXT: subq $24, %rsp
+; OPENBSD64-NEXT: movq __guard_local(%rip), %rax
+; OPENBSD64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; OPENBSD64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; OPENBSD64-NEXT: callq capture@PLT
+; OPENBSD64-NEXT: movq __guard_local(%rip), %rax
+; OPENBSD64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; OPENBSD64-NEXT: jne .LBB0_2
+; OPENBSD64-NEXT: # %bb.1: # %SP_return
+; OPENBSD64-NEXT: addq $24, %rsp
+; OPENBSD64-NEXT: retq
+; OPENBSD64-NEXT: .LBB0_2: # %CallStackCheckFailBlk
+; OPENBSD64-NEXT: movl $.LSSH, %edi
+; OPENBSD64-NEXT: callq __stack_smash_handler@PLT
+ %alloca = alloca i32, align 4
+ call void @capture(ptr %alloca)
+ ret void
+}
+
+declare void @capture(ptr)
diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll
index 953a0d65c538..ab28a3b4a2b6 100644
--- a/llvm/test/CodeGen/X86/test-shrink-bug.ll
+++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll
@@ -48,18 +48,17 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
; CHECK-X86: ## %bb.0:
; CHECK-X86-NEXT: subl $12, %esp
; CHECK-X86-NEXT: .cfi_def_cfa_offset 16
-; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; CHECK-X86-NEXT: cmpb $123, {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: setne %cl
-; CHECK-X86-NEXT: testl $263, %eax ## imm = 0x107
-; CHECK-X86-NEXT: setne %al
-; CHECK-X86-NEXT: testb %cl, %al
-; CHECK-X86-NEXT: jne LBB1_2
-; CHECK-X86-NEXT: ## %bb.1: ## %yes
-; CHECK-X86-NEXT: addl $12, %esp
-; CHECK-X86-NEXT: retl
-; CHECK-X86-NEXT: LBB1_2: ## %no
+; CHECK-X86-NEXT: sete %al
+; CHECK-X86-NEXT: testl $263, %ecx ## imm = 0x107
+; CHECK-X86-NEXT: je LBB1_3
+; CHECK-X86-NEXT: ## %bb.1:
+; CHECK-X86-NEXT: testb %al, %al
+; CHECK-X86-NEXT: jne LBB1_3
+; CHECK-X86-NEXT: ## %bb.2: ## %no
; CHECK-X86-NEXT: calll _bar
+; CHECK-X86-NEXT: LBB1_3: ## %yes
; CHECK-X86-NEXT: addl $12, %esp
; CHECK-X86-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/ucmp.ll b/llvm/test/CodeGen/X86/ucmp.ll
index 6a52acfe2fb3..7f17299b39e3 100644
--- a/llvm/test/CodeGen/X86/ucmp.ll
+++ b/llvm/test/CodeGen/X86/ucmp.ll
@@ -107,29 +107,33 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
; X86-LABEL: ucmp.8.128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl 12(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: cmpl %eax, 24(%ebp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sbbl %edx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl 16(%ebp), %ebx
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl 20(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl %ecx, %eax
; X86-NEXT: setb %al
-; X86-NEXT: cmpl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: movl 8(%ebp), %edi
+; X86-NEXT: cmpl 24(%ebp), %edi
+; X86-NEXT: sbbl 28(%ebp), %edx
+; X86-NEXT: sbbl 32(%ebp), %ebx
+; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: sbbb $0, %al
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 5b1e0545502b..82dfeeee1329 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -153,26 +153,28 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
+; X86-NEXT: subl $80, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 20(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shrl %edx
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: shldl $31, %eax, %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: shll $31, %eax
-; X86-NEXT: movl %esp, %esi
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl 20(%ebp)
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __udivti3
-; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: leal -4(%ebp), %esp
; X86-NEXT: popl %esi
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 30a7f80b2315..3da5973f9f90 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -194,32 +194,34 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
+; X86-NEXT: subl $80, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl 16(%ebp), %edx
+; X86-NEXT: movl 20(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shrl %edx
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: shldl $31, %eax, %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: shll $31, %eax
-; X86-NEXT: movl %esp, %esi
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl 20(%ebp)
-; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %esi
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: calll __udivti3
-; X86-NEXT: addl $32, %esp
+; X86-NEXT: subl $4, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl $-1, %eax
; X86-NEXT: movl $-1, %edx
; X86-NEXT: jne .LBB4_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: .LBB4_2:
; X86-NEXT: leal -4(%ebp), %esp
diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll
index f589d4a7b04a..7ef859978cdb 100644
--- a/llvm/test/CodeGen/X86/umax.ll
+++ b/llvm/test/CodeGen/X86/umax.ll
@@ -232,31 +232,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl %ebx, %edx
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ebx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmpl 24(%ebp), %ebx
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sbbl 28(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: sbbl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl 52(%ebp), %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: cmovbl %ebx, %edx
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovbl %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: cmovbl 24(%ebp), %ebx
+; X86-NEXT: cmovbl 28(%ebp), %edi
+; X86-NEXT: cmovbl 32(%ebp), %edx
+; X86-NEXT: cmovbl %esi, %ecx
+; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -282,37 +285,40 @@ define i128 @test_i128_1(i128 %a) nounwind {
; X86-LABEL: test_i128_1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %eax
; X86-NEXT: cmpl $1, %eax
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: movl $1, %edi
-; X86-NEXT: cmovnel %eax, %edi
-; X86-NEXT: cmovel %ebx, %edi
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: negl %ebp
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl $1, %ebp
-; X86-NEXT: cmovbl %eax, %ebp
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovbl %edx, %ebx
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: cmovel %edi, %ebp
-; X86-NEXT: cmovel %edx, %ebx
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %ebp, (%eax)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: cmpl $0, 28(%ebp)
+; X86-NEXT: movl $1, %esi
+; X86-NEXT: cmovnel %eax, %esi
+; X86-NEXT: cmovel %ecx, %esi
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ecx, %ebx
+; X86-NEXT: movl $1, %ebx
+; X86-NEXT: cmovbl %eax, %ebx
+; X86-NEXT: cmovbl 28(%ebp), %edi
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: cmovel %esi, %ebx
+; X86-NEXT: cmovel 28(%ebp), %edi
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1312,29 +1318,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
;
; X86-LABEL: test_signbits_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shrdl $28, %edi, %ecx
-; X86-NEXT: sarl $28, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: shrdl $28, %edx, %ecx
+; X86-NEXT: sarl $28, %edx
; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: cmovbl %esi, %ecx
-; X86-NEXT: cmovbl %edx, %edi
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: cmovbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%ax = ashr i128 %a, 64
%bx = ashr i128 %b, 92
diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll
index 7a5cdbb9ce75..c927abf3a426 100644
--- a/llvm/test/CodeGen/X86/umin.ll
+++ b/llvm/test/CodeGen/X86/umin.ll
@@ -147,32 +147,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: test_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl %edx, %ebx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: cmpl %ecx, 24(%ebp)
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %esi, %ebx
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: cmovbl %ebx, %edx
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmovbl %edi, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebp, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: cmovbl 24(%ebp), %ecx
+; X86-NEXT: cmovbl 28(%ebp), %edx
+; X86-NEXT: cmovbl 32(%ebp), %esi
+; X86-NEXT: cmovbl %edi, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -727,29 +729,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
;
; X86-LABEL: test_signbits_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shrdl $28, %edi, %ecx
-; X86-NEXT: sarl $28, %edi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %edx
+; X86-NEXT: shrdl $28, %edx, %ecx
+; X86-NEXT: sarl $28, %edx
; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: cmovbl %esi, %ecx
-; X86-NEXT: cmovbl %edx, %edi
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: cmovbl %eax, %edx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%ax = ashr i128 %a, 64
%bx = ashr i128 %b, 92
diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
index 4c3170304b98..89afd1b00444 100644
--- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
@@ -38,8 +38,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: .cfi_def_cfa_offset 44
+; X86-NEXT: subl $28, %esp
+; X86-NEXT: .cfi_def_cfa_offset 48
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
@@ -147,7 +147,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, 16(%ecx)
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $28, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
diff --git a/llvm/test/CodeGen/X86/unreachable-mbb-undef-phi.mir b/llvm/test/CodeGen/X86/unreachable-mbb-undef-phi.mir
index 1bdbc5386266..232a5e3353b2 100644
--- a/llvm/test/CodeGen/X86/unreachable-mbb-undef-phi.mir
+++ b/llvm/test/CodeGen/X86/unreachable-mbb-undef-phi.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=x86_64-- %s -o - -run-pass=processimpdefs -run-pass=unreachable-mbb-elimination | FileCheck %s
+# RUN: llc -mtriple=x86_64-- %s -o - -passes=process-imp-defs,unreachable-mbb-elimination | FileCheck %s
---
name: f
tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
index a15d633d8538..12dccca76eb1 100644
--- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll
+++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
@@ -92,6 +92,8 @@ define i32 @test_wide(i128 %a, i128 %b) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: subl $8, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %esi, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -101,15 +103,15 @@ define i32 @test_wide(i128 %a, i128 %b) {
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: jge .LBB4_2
+; CHECK-NEXT: jge .LBB4_3
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: popl %esi
-; CHECK-NEXT: .cfi_def_cfa_offset 4
-; CHECK-NEXT: retl
-; CHECK-NEXT: .LBB4_2: # %bb2
-; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: jmp .LBB4_2
+; CHECK-NEXT: .LBB4_3: # %bb2
; CHECK-NEXT: movl $2, %eax
+; CHECK-NEXT: .LBB4_2: # %bb1
+; CHECK-NEXT: addl $8, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: popl %esi
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/win32-int-runtime-libcalls.ll b/llvm/test/CodeGen/X86/win32-int-runtime-libcalls.ll
new file mode 100644
index 000000000000..5ac90a0af2e5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/win32-int-runtime-libcalls.ll
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck -check-prefix=CHECK32 %s
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck -check-prefix=CHECK64 %s
+
+define i64 @test_sdiv_i64(i64 %a, i64 %b) {
+; CHECK32-LABEL: test_sdiv_i64:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: calll __alldiv
+; CHECK32-NEXT: retl
+;
+; CHECK64-LABEL: test_sdiv_i64:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: movq %rdx, %r8
+; CHECK64-NEXT: movq %rcx, %rax
+; CHECK64-NEXT: cqto
+; CHECK64-NEXT: idivq %r8
+; CHECK64-NEXT: retq
+ %ret = sdiv i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @test_srem_i64(i64 %a, i64 %b) {
+; CHECK32-LABEL: test_srem_i64:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: calll __allrem
+; CHECK32-NEXT: retl
+;
+; CHECK64-LABEL: test_srem_i64:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: movq %rdx, %r8
+; CHECK64-NEXT: movq %rcx, %rax
+; CHECK64-NEXT: cqto
+; CHECK64-NEXT: idivq %r8
+; CHECK64-NEXT: movq %rdx, %rax
+; CHECK64-NEXT: retq
+ %ret = srem i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @test_udiv_i64(i64 %a, i64 %b) {
+; CHECK32-LABEL: test_udiv_i64:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: calll __aulldiv
+; CHECK32-NEXT: retl
+;
+; CHECK64-LABEL: test_udiv_i64:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: movq %rdx, %r8
+; CHECK64-NEXT: movq %rcx, %rax
+; CHECK64-NEXT: xorl %edx, %edx
+; CHECK64-NEXT: divq %r8
+; CHECK64-NEXT: retq
+ %ret = udiv i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @test_urem_i64(i64 %a, i64 %b) {
+; CHECK32-LABEL: test_urem_i64:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: pushl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: calll __aullrem
+; CHECK32-NEXT: retl
+;
+; CHECK64-LABEL: test_urem_i64:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: movq %rdx, %r8
+; CHECK64-NEXT: movq %rcx, %rax
+; CHECK64-NEXT: xorl %edx, %edx
+; CHECK64-NEXT: divq %r8
+; CHECK64-NEXT: movq %rdx, %rax
+; CHECK64-NEXT: retq
+ %ret = urem i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @test_mul_i64(i64 %a, i64 %b) {
+; CHECK32-LABEL: test_mul_i64:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %esi
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT: movl %ecx, %eax
+; CHECK32-NEXT: mull %esi
+; CHECK32-NEXT: imull {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: addl %ecx, %edx
+; CHECK32-NEXT: imull {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT: addl %esi, %edx
+; CHECK32-NEXT: popl %esi
+; CHECK32-NEXT: retl
+;
+; CHECK64-LABEL: test_mul_i64:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: movq %rcx, %rax
+; CHECK64-NEXT: imulq %rdx, %rax
+; CHECK64-NEXT: retq
+ %ret = mul i64 %a, %b
+ ret i64 %ret
+}
diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
index 020004def6e7..62c63d5defe6 100644
--- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
@@ -2,14 +2,6 @@
; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -stackrealign -verify-machineinstrs < %s | FileCheck -check-prefix=X32ABI %s
-; This should run with NaCl as well ( -mtriple=x86_64-pc-nacl ) but currently doesn't due to PR22655
-
-; Make sure the correct register gets set up as the base pointer
-; This should be rbx for x64 and 64-bit NaCl and ebx for x32
-; NACL-LABEL: base
-; NACL: subq $32, %rsp
-; NACL: movq %rsp, %rbx
-
declare i32 @helper() nounwind
define void @base() #0 {
; CHECK-LABEL: base:
diff --git a/llvm/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll b/llvm/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll
index bceebdc9ad7d..26be80ea5894 100644
--- a/llvm/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll
@@ -1,6 +1,5 @@
; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-linux < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-nacl < %s | FileCheck -check-prefix=NACL %s
; x32 uses %esp, %ebp as stack and frame pointers
@@ -14,12 +13,6 @@
; X32ABI: movl %esp, %ebp
; X32ABI: movl %edi, -4(%ebp)
; X32ABI: popq %rbp
-; NACL-LABEL: foo
-; NACL: pushq %rbp
-; NACL: movq %rsp, %rbp
-; NACL: movl %edi, -4(%rbp)
-; NACL: popq %rbp
-
define void @foo(ptr %a) #0 {
entry:
@@ -30,5 +23,3 @@ entry:
}
attributes #0 = { nounwind uwtable "frame-pointer"="all"}
-
-