diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/i128-mul.ll | 178 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/loop-strength-reduce5.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/madd.ll | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr49451.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll | 114 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/xor.ll | 132 |
8 files changed, 248 insertions, 244 deletions
diff --git a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll index 06cf968512db..8a8e7a3b4df2 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll @@ -297,30 +297,30 @@ define dso_local void @test6(i16 signext %0) nounwind { ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movl $buf, %ecx -; CHECK-NEXT: movl $32, %edx -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl $buf, %edx +; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: jmp .LBB5_1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB5_3: # %if.false ; CHECK-NEXT: # in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: decl %esi +; CHECK-NEXT: decl %eax ; CHECK-NEXT: .LBB5_4: # %loop.bb2 ; CHECK-NEXT: # in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: leal (%rdi,%rsi), %r8d +; CHECK-NEXT: leal (%rdi,%rax), %r8d ; CHECK-NEXT: movw %r8w, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: cmpw $7, %si +; CHECK-NEXT: cmpw $7, %ax ; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) ; CHECK-NEXT: tilezero %tmm0 -; CHECK-NEXT: tilestored %tmm0, (%rcx,%rdx) +; CHECK-NEXT: tilestored %tmm0, (%rdx,%rsi) ; CHECK-NEXT: jne .LBB5_5 ; CHECK-NEXT: .LBB5_1: # %loop.bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne .LBB5_3 ; CHECK-NEXT: # %bb.2: # %if.true ; CHECK-NEXT: # in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: incl %esi +; CHECK-NEXT: incl %eax ; CHECK-NEXT: jmp .LBB5_4 ; CHECK-NEXT: .LBB5_5: # %exit ; CHECK-NEXT: tilerelease diff --git a/llvm/test/CodeGen/X86/i128-mul.ll b/llvm/test/CodeGen/X86/i128-mul.ll index 477a0dce5c81..cffd88c55bb0 100644 --- a/llvm/test/CodeGen/X86/i128-mul.ll +++ b/llvm/test/CodeGen/X86/i128-mul.ll @@ -111,63 +111,62 @@ define i64 @mul1(i64 %n, ptr nocapture %z, ptr nocapture %x, i64 %y) nounwind { ; X86-NOBMI-NEXT: orl %ecx, %eax ; X86-NOBMI-NEXT: je .LBB1_3 ; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: xorl %ecx, %ecx -; X86-NOBMI-NEXT: xorl %edi, %edi -; X86-NOBMI-NEXT: xorl %ebp, %ebp +; X86-NOBMI-NEXT: movl $0, (%esp) # 4-byte Folded Spill ; X86-NOBMI-NEXT: .p2align 4 ; X86-NOBMI-NEXT: .LBB1_2: # %for.body ; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl (%eax,%edi,8), %ebp -; X86-NOBMI-NEXT: movl 4(%eax,%edi,8), %ebx +; X86-NOBMI-NEXT: movl (%eax,%ecx,8), %edi +; X86-NOBMI-NEXT: movl 4(%eax,%ecx,8), %ebx ; X86-NOBMI-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOBMI-NEXT: movl %ebp, %eax -; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: mull %esi +; X86-NOBMI-NEXT: movl %edx, %ebp ; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOBMI-NEXT: movl %ebx, %eax -; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl %eax, %ebx -; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NOBMI-NEXT: adcl $0, %edx -; X86-NOBMI-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-NOBMI-NEXT: movl %ebp, %eax +; X86-NOBMI-NEXT: mull %esi +; X86-NOBMI-NEXT: movl %edx, %ebx +; X86-NOBMI-NEXT: movl %eax, %esi +; X86-NOBMI-NEXT: addl %ebp, %esi +; X86-NOBMI-NEXT: adcl $0, %ebx +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: mull %edx -; X86-NOBMI-NEXT: movl %eax, %ebp -; X86-NOBMI-NEXT: addl %ebx, %ebp -; X86-NOBMI-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOBMI-NEXT: adcl (%esp), %edx # 4-byte Folded Reload -; X86-NOBMI-NEXT: movl %edx, %ebx -; X86-NOBMI-NEXT: setb (%esp) # 1-byte Folded Spill +; X86-NOBMI-NEXT: movl %edx, %ebp +; X86-NOBMI-NEXT: movl %eax, %edi +; X86-NOBMI-NEXT: addl %esi, %edi +; X86-NOBMI-NEXT: adcl %ebx, %ebp +; X86-NOBMI-NEXT: setb %bl ; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl %eax, %esi -; X86-NOBMI-NEXT: addl %ebx, %esi -; X86-NOBMI-NEXT: movl %ecx, %eax -; X86-NOBMI-NEXT: movzbl (%esp), %ebx # 1-byte Folded Reload -; X86-NOBMI-NEXT: movl %edx, %ecx -; X86-NOBMI-NEXT: adcl %ebx, %ecx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NOBMI-NEXT: adcl %eax, %ebp -; X86-NOBMI-NEXT: adcl $0, %esi -; X86-NOBMI-NEXT: adcl $0, %ecx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl %edx, (%eax,%edi,8) -; X86-NOBMI-NEXT: movl %ebp, 4(%eax,%edi,8) -; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: addl $1, %edi -; X86-NOBMI-NEXT: adcl $0, %ebp -; X86-NOBMI-NEXT: movl %edi, %eax -; X86-NOBMI-NEXT: xorl %edx, %eax -; X86-NOBMI-NEXT: movl %ebp, %edx -; X86-NOBMI-NEXT: xorl %ebx, %edx -; X86-NOBMI-NEXT: orl %eax, %edx +; X86-NOBMI-NEXT: addl %ebp, %eax +; X86-NOBMI-NEXT: movzbl %bl, %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NOBMI-NEXT: adcl %esi, %edx +; X86-NOBMI-NEXT: movl %ecx, %ebx +; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NOBMI-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NOBMI-NEXT: adcl $0, %eax +; X86-NOBMI-NEXT: adcl $0, %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %ecx, (%esi,%ebx,8) +; X86-NOBMI-NEXT: movl %ebx, %ecx +; X86-NOBMI-NEXT: movl %edi, 4(%esi,%ebx,8) +; X86-NOBMI-NEXT: addl $1, %ecx +; X86-NOBMI-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-NOBMI-NEXT: adcl $0, %edi +; X86-NOBMI-NEXT: movl %ecx, %esi +; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NOBMI-NEXT: xorl %ebp, %edi +; X86-NOBMI-NEXT: orl %esi, %edi ; X86-NOBMI-NEXT: jne .LBB1_2 ; X86-NOBMI-NEXT: .LBB1_3: # %for.end ; X86-NOBMI-NEXT: xorl %eax, %eax @@ -185,66 +184,71 @@ define i64 @mul1(i64 %n, ptr nocapture %z, ptr nocapture %x, i64 %y) nounwind { ; X86-BMI-NEXT: pushl %ebx ; X86-BMI-NEXT: pushl %edi ; X86-BMI-NEXT: pushl %esi -; X86-BMI-NEXT: subl $16, %esp +; X86-BMI-NEXT: subl $20, %esp ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI-NEXT: orl %ecx, %eax ; X86-BMI-NEXT: je .LBB1_3 ; X86-BMI-NEXT: # %bb.1: # %for.body.preheader -; X86-BMI-NEXT: xorl %esi, %esi -; X86-BMI-NEXT: xorl %edi, %edi +; X86-BMI-NEXT: xorl %ecx, %ecx +; X86-BMI-NEXT: xorl %eax, %eax ; X86-BMI-NEXT: xorl %ebx, %ebx -; X86-BMI-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-BMI-NEXT: xorl %ebp, %ebp ; X86-BMI-NEXT: .p2align 4 ; X86-BMI-NEXT: .LBB1_2: # %for.body ; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-BMI-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI-NEXT: movl (%eax,%ebx,8), %ecx -; X86-BMI-NEXT: movl 4(%eax,%ebx,8), %ebp -; X86-BMI-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl 4(%eax,%ebx,8), %esi +; X86-BMI-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %edx, %eax -; X86-BMI-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: mulxl %eax, %edx, %edi ; X86-BMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-BMI-NEXT: movl %ebp, %edx -; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %ebp -; X86-BMI-NEXT: addl (%esp), %eax # 4-byte Folded Reload -; X86-BMI-NEXT: adcl $0, %ebp +; X86-BMI-NEXT: movl %esi, %edx +; X86-BMI-NEXT: mulxl %eax, %esi, %eax +; X86-BMI-NEXT: addl %edi, %esi +; X86-BMI-NEXT: adcl $0, %eax ; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %ecx, %edx -; X86-BMI-NEXT: addl %eax, %ecx -; X86-BMI-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-BMI-NEXT: movl %esi, %eax -; X86-BMI-NEXT: adcl %ebp, %edx -; X86-BMI-NEXT: movl %edx, %ebp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: mulxl %ecx, %edi, %ebp +; X86-BMI-NEXT: addl %esi, %edi +; X86-BMI-NEXT: adcl %eax, %ebp ; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %esi, %edi +; X86-BMI-NEXT: mulxl %ecx, %ecx, %eax ; X86-BMI-NEXT: setb %dl -; X86-BMI-NEXT: addl %ebp, %esi +; X86-BMI-NEXT: addl %ebp, %ecx +; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI-NEXT: movzbl %dl, %edx -; X86-BMI-NEXT: adcl %edx, %edi -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-BMI-NEXT: addl %eax, %edx -; X86-BMI-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload -; X86-BMI-NEXT: adcl $0, %esi -; X86-BMI-NEXT: adcl $0, %edi -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: movl %edx, (%eax,%ebx,8) -; X86-BMI-NEXT: movl %ecx, 4(%eax,%ebx,8) -; X86-BMI-NEXT: addl $1, %ebx -; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-BMI-NEXT: adcl %edx, %eax +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-BMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-BMI-NEXT: adcl (%esp), %edi # 4-byte Folded Reload ; X86-BMI-NEXT: adcl $0, %ecx -; X86-BMI-NEXT: movl %ebx, %eax -; X86-BMI-NEXT: xorl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-BMI-NEXT: xorl %ebp, %ecx -; X86-BMI-NEXT: orl %eax, %ecx +; X86-BMI-NEXT: adcl $0, %edx +; X86-BMI-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl %eax, (%edx,%ebx,8) +; X86-BMI-NEXT: movl %edi, 4(%edx,%ebx,8) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: addl $1, %ebx +; X86-BMI-NEXT: adcl $0, %ebp +; X86-BMI-NEXT: movl %ebx, %edx +; X86-BMI-NEXT: xorl %esi, %edx +; X86-BMI-NEXT: movl %ebp, %esi +; X86-BMI-NEXT: xorl %edi, %esi +; X86-BMI-NEXT: orl %edx, %esi +; X86-BMI-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-BMI-NEXT: jne .LBB1_2 ; X86-BMI-NEXT: .LBB1_3: # %for.end ; X86-BMI-NEXT: xorl %eax, %eax ; X86-BMI-NEXT: xorl %edx, %edx -; X86-BMI-NEXT: addl $16, %esp +; X86-BMI-NEXT: addl $20, %esp ; X86-BMI-NEXT: popl %esi ; X86-BMI-NEXT: popl %edi ; X86-BMI-NEXT: popl %ebx @@ -257,12 +261,11 @@ define i64 @mul1(i64 %n, ptr nocapture %z, ptr nocapture %x, i64 %y) nounwind { ; X64-NOBMI-NEXT: je .LBB1_3 ; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader ; X64-NOBMI-NEXT: movq %rdx, %r8 -; X64-NOBMI-NEXT: xorl %edx, %edx +; X64-NOBMI-NEXT: xorl %r10d, %r10d ; X64-NOBMI-NEXT: xorl %r9d, %r9d ; X64-NOBMI-NEXT: .p2align 4 ; X64-NOBMI-NEXT: .LBB1_2: # %for.body ; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI-NEXT: movq %rdx, %r10 ; X64-NOBMI-NEXT: movq %rcx, %rax ; X64-NOBMI-NEXT: mulq (%r8,%r9,8) ; X64-NOBMI-NEXT: addq %r10, %rax @@ -270,6 +273,7 @@ define i64 @mul1(i64 %n, ptr nocapture %z, ptr nocapture %x, i64 %y) nounwind { ; X64-NOBMI-NEXT: movq %rax, (%rsi,%r9,8) ; X64-NOBMI-NEXT: incq %r9 ; X64-NOBMI-NEXT: cmpq %r9, %rdi +; X64-NOBMI-NEXT: movq %rdx, %r10 ; X64-NOBMI-NEXT: jne .LBB1_2 ; X64-NOBMI-NEXT: .LBB1_3: # %for.end ; X64-NOBMI-NEXT: xorl %eax, %eax @@ -281,12 +285,11 @@ define i64 @mul1(i64 %n, ptr nocapture %z, ptr nocapture %x, i64 %y) nounwind { ; X64-BMI-NEXT: je .LBB1_3 ; X64-BMI-NEXT: # %bb.1: # %for.body.preheader ; X64-BMI-NEXT: movq %rdx, %rax -; X64-BMI-NEXT: xorl %edx, %edx +; X64-BMI-NEXT: xorl %r9d, %r9d ; X64-BMI-NEXT: xorl %r8d, %r8d ; X64-BMI-NEXT: .p2align 4 ; X64-BMI-NEXT: .LBB1_2: # %for.body ; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI-NEXT: movq %rdx, %r9 ; X64-BMI-NEXT: movq %rcx, %rdx ; X64-BMI-NEXT: mulxq (%rax,%r8,8), %r10, %rdx ; X64-BMI-NEXT: addq %r9, %r10 @@ -294,6 +297,7 @@ define i64 @mul1(i64 %n, ptr nocapture %z, ptr nocapture %x, i64 %y) nounwind { ; X64-BMI-NEXT: movq %r10, (%rsi,%r8,8) ; X64-BMI-NEXT: incq %r8 ; X64-BMI-NEXT: cmpq %r8, %rdi +; X64-BMI-NEXT: movq %rdx, %r9 ; X64-BMI-NEXT: jne .LBB1_2 ; X64-BMI-NEXT: .LBB1_3: # %for.end ; X64-BMI-NEXT: xorl %eax, %eax diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce5.ll b/llvm/test/CodeGen/X86/loop-strength-reduce5.ll index ebae51fa2aa4..08003739b55d 100644 --- a/llvm/test/CodeGen/X86/loop-strength-reduce5.ll +++ b/llvm/test/CodeGen/X86/loop-strength-reduce5.ll @@ -16,11 +16,11 @@ define void @foo(i32 %N) nounwind { ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %bb ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movw %dx, X -; CHECK-NEXT: movw %cx, Y -; CHECK-NEXT: incl %edx -; CHECK-NEXT: addl $4, %ecx -; CHECK-NEXT: cmpl %edx, %eax +; CHECK-NEXT: movw %cx, X +; CHECK-NEXT: movw %dx, Y +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: addl $4, %edx +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: .LBB0_3: # %return ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index 2a2a4a5ca18d..209ee79d5141 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -1480,15 +1480,15 @@ define i32 @test_unsigned_short_512(ptr nocapture readonly, ptr nocapture readon ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: vpmulld %ymm2, %ymm4, %ymm2 -; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: vpmulld %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: addq $16, %rcx ; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB10_1 ; AVX2-NEXT: # %bb.2: # %middle.block -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] @@ -1728,10 +1728,10 @@ define i32 @test_unsigned_short_1024(ptr nocapture readonly, ptr nocapture reado ; AVX2-NEXT: vpaddd %ymm2, %ymm4, %ymm2 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: vpmulld %ymm5, %ymm4, %ymm4 -; AVX2-NEXT: vpaddd %ymm1, %ymm4, %ymm1 +; AVX2-NEXT: vpaddd %ymm0, %ymm4, %ymm0 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: vpmulld %ymm6, %ymm4, %ymm4 -; AVX2-NEXT: vpaddd %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpaddd %ymm1, %ymm4, %ymm1 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: vpmulld %ymm7, %ymm4, %ymm4 ; AVX2-NEXT: vpaddd %ymm3, %ymm4, %ymm3 @@ -1739,9 +1739,9 @@ define i32 @test_unsigned_short_1024(ptr nocapture readonly, ptr nocapture reado ; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB11_1 ; AVX2-NEXT: # %bb.2: # %middle.block -; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpaddd %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] @@ -1765,15 +1765,15 @@ define i32 @test_unsigned_short_1024(ptr nocapture readonly, ptr nocapture reado ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512-NEXT: vpmulld %zmm2, %zmm4, %zmm2 -; AVX512-NEXT: vpaddd %zmm1, %zmm2, %zmm1 +; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512-NEXT: vpmulld %zmm3, %zmm2, %zmm2 -; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpaddd %zmm1, %zmm2, %zmm1 ; AVX512-NEXT: addq $16, %rcx ; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB11_1 ; AVX512-NEXT: # %bb.2: # %middle.block -; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/pr49451.ll b/llvm/test/CodeGen/X86/pr49451.ll index 173c41140ebe..1a7551f6117e 100644 --- a/llvm/test/CodeGen/X86/pr49451.ll +++ b/llvm/test/CodeGen/X86/pr49451.ll @@ -18,15 +18,15 @@ define void @func_6(i8 %uc_8, i64 %uli_10) nounwind { ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB0_1: # %for.body612 ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: testb %dl, %dl +; X86-NEXT: testb %bl, %bl ; X86-NEXT: je .LBB0_2 ; X86-NEXT: # %bb.3: # %if.end1401 ; X86-NEXT: # in Loop: Header=BB0_1 Depth=1 ; X86-NEXT: addl %eax, %esi ; X86-NEXT: movw %si, s_2 -; X86-NEXT: movw %bx, s_0 +; X86-NEXT: movw %dx, s_0 ; X86-NEXT: incl %ecx -; X86-NEXT: incl %ebx +; X86-NEXT: incl %edx ; X86-NEXT: cmpw $73, %cx ; X86-NEXT: jl .LBB0_1 ; X86-NEXT: # %bb.4: # %for.body1703 diff --git a/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll index 1c3d27fac420..c0962236f93d 100644 --- a/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -12729,43 +12729,43 @@ define void @lshr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; FALLBACK9-NEXT: pushq %rbx ; FALLBACK9-NEXT: vmovups (%rdi), %ymm0 ; FALLBACK9-NEXT: vmovups 32(%rdi), %ymm1 -; FALLBACK9-NEXT: movl (%rsi), %edi +; FALLBACK9-NEXT: movl (%rsi), %eax ; FALLBACK9-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; FALLBACK9-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) ; FALLBACK9-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) ; FALLBACK9-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) ; FALLBACK9-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; FALLBACK9-NEXT: leal (,%rdi,8), %ecx +; FALLBACK9-NEXT: leal (,%rax,8), %ecx ; FALLBACK9-NEXT: andl $56, %ecx -; FALLBACK9-NEXT: andl $56, %edi -; FALLBACK9-NEXT: movq -96(%rsp,%rdi), %rsi -; FALLBACK9-NEXT: movq -104(%rsp,%rdi), %r9 -; FALLBACK9-NEXT: movq %r9, %rax -; FALLBACK9-NEXT: shrdq %cl, %rsi, %rax -; FALLBACK9-NEXT: movq -112(%rsp,%rdi), %r10 +; FALLBACK9-NEXT: andl $56, %eax +; FALLBACK9-NEXT: movq -96(%rsp,%rax), %rdi +; FALLBACK9-NEXT: movq -104(%rsp,%rax), %r9 +; FALLBACK9-NEXT: movq %r9, %rsi +; FALLBACK9-NEXT: shrdq %cl, %rdi, %rsi +; FALLBACK9-NEXT: movq -112(%rsp,%rax), %r10 ; FALLBACK9-NEXT: movq %r10, %r8 ; FALLBACK9-NEXT: shrdq %cl, %r9, %r8 -; FALLBACK9-NEXT: movq -80(%rsp,%rdi), %r9 -; FALLBACK9-NEXT: movq -88(%rsp,%rdi), %r11 +; FALLBACK9-NEXT: movq -80(%rsp,%rax), %r9 +; FALLBACK9-NEXT: movq -88(%rsp,%rax), %r11 ; FALLBACK9-NEXT: movq %r11, %rbx ; FALLBACK9-NEXT: shrdq %cl, %r9, %rbx -; FALLBACK9-NEXT: shrdq %cl, %r11, %rsi -; FALLBACK9-NEXT: movq -72(%rsp,%rdi), %r11 +; FALLBACK9-NEXT: shrdq %cl, %r11, %rdi +; FALLBACK9-NEXT: movq -72(%rsp,%rax), %r11 ; FALLBACK9-NEXT: shrdq %cl, %r11, %r9 -; FALLBACK9-NEXT: movq -128(%rsp,%rdi), %r14 -; FALLBACK9-NEXT: movq -120(%rsp,%rdi), %rdi -; FALLBACK9-NEXT: movq %rdi, %r15 +; FALLBACK9-NEXT: movq -128(%rsp,%rax), %r14 +; FALLBACK9-NEXT: movq -120(%rsp,%rax), %rax +; FALLBACK9-NEXT: movq %rax, %r15 ; FALLBACK9-NEXT: shrdq %cl, %r10, %r15 -; FALLBACK9-NEXT: shrdq %cl, %rdi, %r14 +; FALLBACK9-NEXT: shrdq %cl, %rax, %r14 ; FALLBACK9-NEXT: # kill: def $cl killed $cl killed $ecx ; FALLBACK9-NEXT: shrq %cl, %r11 ; FALLBACK9-NEXT: movq %r15, 8(%rdx) ; FALLBACK9-NEXT: movq %r9, 48(%rdx) ; FALLBACK9-NEXT: movq %r11, 56(%rdx) -; FALLBACK9-NEXT: movq %rsi, 32(%rdx) +; FALLBACK9-NEXT: movq %rdi, 32(%rdx) ; FALLBACK9-NEXT: movq %rbx, 40(%rdx) ; FALLBACK9-NEXT: movq %r8, 16(%rdx) -; FALLBACK9-NEXT: movq %rax, 24(%rdx) +; FALLBACK9-NEXT: movq %rsi, 24(%rdx) ; FALLBACK9-NEXT: movq %r14, (%rdx) ; FALLBACK9-NEXT: popq %rbx ; FALLBACK9-NEXT: popq %r14 @@ -12906,45 +12906,45 @@ define void @lshr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; FALLBACK12-NEXT: pushq %rbx ; FALLBACK12-NEXT: pushq %rax ; FALLBACK12-NEXT: vmovups (%rdi), %zmm0 -; FALLBACK12-NEXT: movl (%rsi), %r10d +; FALLBACK12-NEXT: movl (%rsi), %r9d ; FALLBACK12-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; FALLBACK12-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp) ; FALLBACK12-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) -; FALLBACK12-NEXT: leal (,%r10,8), %eax +; FALLBACK12-NEXT: leal (,%r9,8), %eax ; FALLBACK12-NEXT: andl $56, %eax -; FALLBACK12-NEXT: andl $56, %r10d -; FALLBACK12-NEXT: movq -128(%rsp,%r10), %r9 -; FALLBACK12-NEXT: movq -120(%rsp,%r10), %r8 +; FALLBACK12-NEXT: andl $56, %r9d +; FALLBACK12-NEXT: movq -128(%rsp,%r9), %r10 +; FALLBACK12-NEXT: movq -120(%rsp,%r9), %r8 ; FALLBACK12-NEXT: movl %eax, %ecx -; FALLBACK12-NEXT: shrq %cl, %r9 +; FALLBACK12-NEXT: shrq %cl, %r10 ; FALLBACK12-NEXT: movl %eax, %esi ; FALLBACK12-NEXT: notb %sil ; FALLBACK12-NEXT: leaq (%r8,%r8), %rdi ; FALLBACK12-NEXT: movl %esi, %ecx ; FALLBACK12-NEXT: shlq %cl, %rdi -; FALLBACK12-NEXT: orq %r9, %rdi -; FALLBACK12-NEXT: movq -104(%rsp,%r10), %r9 -; FALLBACK12-NEXT: movq %r9, %rbx +; FALLBACK12-NEXT: orq %r10, %rdi +; FALLBACK12-NEXT: movq -104(%rsp,%r9), %r10 +; FALLBACK12-NEXT: movq %r10, %rbx ; FALLBACK12-NEXT: movl %eax, %ecx ; FALLBACK12-NEXT: shrq %cl, %rbx -; FALLBACK12-NEXT: movq -96(%rsp,%r10), %r12 +; FALLBACK12-NEXT: movq -96(%rsp,%r9), %r12 ; FALLBACK12-NEXT: leaq (%r12,%r12), %r11 ; FALLBACK12-NEXT: movl %esi, %ecx ; FALLBACK12-NEXT: shlq %cl, %r11 ; FALLBACK12-NEXT: orq %rbx, %r11 -; FALLBACK12-NEXT: movq -112(%rsp,%r10), %rbx +; FALLBACK12-NEXT: movq -112(%rsp,%r9), %rbx ; FALLBACK12-NEXT: movq %rbx, %r14 ; FALLBACK12-NEXT: movl %eax, %ecx ; FALLBACK12-NEXT: shrq %cl, %r14 -; FALLBACK12-NEXT: addq %r9, %r9 +; FALLBACK12-NEXT: addq %r10, %r10 ; FALLBACK12-NEXT: movl %esi, %ecx -; FALLBACK12-NEXT: shlq %cl, %r9 -; FALLBACK12-NEXT: orq %r14, %r9 -; FALLBACK12-NEXT: movq -88(%rsp,%r10), %r14 +; FALLBACK12-NEXT: shlq %cl, %r10 +; FALLBACK12-NEXT: orq %r14, %r10 +; FALLBACK12-NEXT: movq -88(%rsp,%r9), %r14 ; FALLBACK12-NEXT: movq %r14, %r13 ; FALLBACK12-NEXT: movl %eax, %ecx ; FALLBACK12-NEXT: shrq %cl, %r13 -; FALLBACK12-NEXT: movq -80(%rsp,%r10), %rbp +; FALLBACK12-NEXT: movq -80(%rsp,%r9), %rbp ; FALLBACK12-NEXT: leaq (%rbp,%rbp), %r15 ; FALLBACK12-NEXT: movl %esi, %ecx ; FALLBACK12-NEXT: shlq %cl, %r15 @@ -12957,8 +12957,8 @@ define void @lshr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; FALLBACK12-NEXT: orq %r12, %r14 ; FALLBACK12-NEXT: movl %eax, %ecx ; FALLBACK12-NEXT: shrq %cl, %rbp -; FALLBACK12-NEXT: movq -72(%rsp,%r10), %r10 -; FALLBACK12-NEXT: leaq (%r10,%r10), %r12 +; FALLBACK12-NEXT: movq -72(%rsp,%r9), %r9 +; FALLBACK12-NEXT: leaq (%r9,%r9), %r12 ; FALLBACK12-NEXT: movl %esi, %ecx ; FALLBACK12-NEXT: shlq %cl, %r12 ; FALLBACK12-NEXT: orq %rbp, %r12 @@ -12969,13 +12969,13 @@ define void @lshr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; FALLBACK12-NEXT: shlq %cl, %rbx ; FALLBACK12-NEXT: orq %r8, %rbx ; FALLBACK12-NEXT: movl %eax, %ecx -; FALLBACK12-NEXT: shrq %cl, %r10 -; FALLBACK12-NEXT: movq %r10, 56(%rdx) +; FALLBACK12-NEXT: shrq %cl, %r9 +; FALLBACK12-NEXT: movq %r9, 56(%rdx) ; FALLBACK12-NEXT: movq %rbx, 8(%rdx) ; FALLBACK12-NEXT: movq %r12, 48(%rdx) ; FALLBACK12-NEXT: movq %r14, 32(%rdx) ; FALLBACK12-NEXT: movq %r15, 40(%rdx) -; FALLBACK12-NEXT: movq %r9, 16(%rdx) +; FALLBACK12-NEXT: movq %r10, 16(%rdx) ; FALLBACK12-NEXT: movq %r11, 24(%rdx) ; FALLBACK12-NEXT: movq %rdi, (%rdx) ; FALLBACK12-NEXT: addq $8, %rsp @@ -13111,40 +13111,40 @@ define void @lshr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; FALLBACK15-NEXT: pushq %r14 ; FALLBACK15-NEXT: pushq %rbx ; FALLBACK15-NEXT: vmovups (%rdi), %zmm0 -; FALLBACK15-NEXT: movl (%rsi), %edi +; FALLBACK15-NEXT: movl (%rsi), %eax ; FALLBACK15-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; FALLBACK15-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp) ; FALLBACK15-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) -; FALLBACK15-NEXT: leal (,%rdi,8), %ecx +; FALLBACK15-NEXT: leal (,%rax,8), %ecx ; FALLBACK15-NEXT: andl $56, %ecx -; FALLBACK15-NEXT: andl $56, %edi -; FALLBACK15-NEXT: movq -96(%rsp,%rdi), %rsi -; FALLBACK15-NEXT: movq -104(%rsp,%rdi), %r9 -; FALLBACK15-NEXT: movq %r9, %rax -; FALLBACK15-NEXT: shrdq %cl, %rsi, %rax -; FALLBACK15-NEXT: movq -112(%rsp,%rdi), %r10 +; FALLBACK15-NEXT: andl $56, %eax +; FALLBACK15-NEXT: movq -96(%rsp,%rax), %rdi +; FALLBACK15-NEXT: movq -104(%rsp,%rax), %r9 +; FALLBACK15-NEXT: movq %r9, %rsi +; FALLBACK15-NEXT: shrdq %cl, %rdi, %rsi +; FALLBACK15-NEXT: movq -112(%rsp,%rax), %r10 ; FALLBACK15-NEXT: movq %r10, %r8 ; FALLBACK15-NEXT: shrdq %cl, %r9, %r8 -; FALLBACK15-NEXT: movq -80(%rsp,%rdi), %r9 -; FALLBACK15-NEXT: movq -88(%rsp,%rdi), %r11 +; FALLBACK15-NEXT: movq -80(%rsp,%rax), %r9 +; FALLBACK15-NEXT: movq -88(%rsp,%rax), %r11 ; FALLBACK15-NEXT: movq %r11, %rbx ; FALLBACK15-NEXT: shrdq %cl, %r9, %rbx -; FALLBACK15-NEXT: shrdq %cl, %r11, %rsi -; FALLBACK15-NEXT: movq -72(%rsp,%rdi), %r11 +; FALLBACK15-NEXT: shrdq %cl, %r11, %rdi +; FALLBACK15-NEXT: movq -72(%rsp,%rax), %r11 ; FALLBACK15-NEXT: shrdq %cl, %r11, %r9 -; FALLBACK15-NEXT: movq -128(%rsp,%rdi), %r14 -; FALLBACK15-NEXT: movq -120(%rsp,%rdi), %rdi -; FALLBACK15-NEXT: movq %rdi, %r15 +; FALLBACK15-NEXT: movq -128(%rsp,%rax), %r14 +; FALLBACK15-NEXT: movq -120(%rsp,%rax), %rax +; FALLBACK15-NEXT: movq %rax, %r15 ; FALLBACK15-NEXT: shrdq %cl, %r10, %r15 ; FALLBACK15-NEXT: shrxq %rcx, %r11, %r10 ; FALLBACK15-NEXT: # kill: def $cl killed $cl killed $rcx -; FALLBACK15-NEXT: shrdq %cl, %rdi, %r14 +; FALLBACK15-NEXT: shrdq %cl, %rax, %r14 ; FALLBACK15-NEXT: movq %r15, 8(%rdx) ; FALLBACK15-NEXT: movq %r9, 48(%rdx) -; FALLBACK15-NEXT: movq %rsi, 32(%rdx) +; FALLBACK15-NEXT: movq %rdi, 32(%rdx) ; FALLBACK15-NEXT: movq %rbx, 40(%rdx) ; FALLBACK15-NEXT: movq %r8, 16(%rdx) -; FALLBACK15-NEXT: movq %rax, 24(%rdx) +; FALLBACK15-NEXT: movq %rsi, 24(%rdx) ; FALLBACK15-NEXT: movq %r14, (%rdx) ; FALLBACK15-NEXT: movq %r10, 56(%rdx) ; FALLBACK15-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index 9fbbba2ed3b4..37620ecf8c1b 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1185,10 +1185,10 @@ define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 { ; ENABLE-NEXT: .p2align 4 ; ENABLE-NEXT: LBB14_2: ## %for.body ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 -; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: cmpl %esi, %edi +; ENABLE-NEXT: setl %al ; ENABLE-NEXT: xorl %esi, %esi -; ENABLE-NEXT: cmpl %eax, %edi -; ENABLE-NEXT: setl %sil +; ENABLE-NEXT: movb %al, %sil ; ENABLE-NEXT: incb %dl ; ENABLE-NEXT: cmpb $45, %dl ; ENABLE-NEXT: jl LBB14_2 @@ -1220,10 +1220,10 @@ define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 { ; DISABLE-NEXT: .p2align 4 ; DISABLE-NEXT: LBB14_2: ## %for.body ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 -; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: cmpl %esi, %edi +; DISABLE-NEXT: setl %al ; DISABLE-NEXT: xorl %esi, %esi -; DISABLE-NEXT: cmpl %eax, %edi -; DISABLE-NEXT: setl %sil +; DISABLE-NEXT: movb %al, %sil ; DISABLE-NEXT: incb %dl ; DISABLE-NEXT: cmpb $45, %dl ; DISABLE-NEXT: jl LBB14_2 diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll index 59fbf7183abc..2bef66825d8c 100644 --- a/llvm/test/CodeGen/X86/xor.ll +++ b/llvm/test/CodeGen/X86/xor.ll @@ -62,12 +62,12 @@ define i32 @test4(i32 %a, i32 %b) nounwind { ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB3_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ecx, %edx ; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: notl %ecx -; X86-NEXT: andl %edx, %ecx -; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: notl %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: jne .LBB3_1 ; X86-NEXT: # %bb.2: # %bb12 ; X86-NEXT: retl @@ -78,12 +78,12 @@ define i32 @test4(i32 %a, i32 %b) nounwind { ; X64-LIN-NEXT: .p2align 4 ; X64-LIN-NEXT: .LBB3_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: movl %esi, %ecx ; X64-LIN-NEXT: xorl %esi, %eax -; X64-LIN-NEXT: movl %eax, %esi -; X64-LIN-NEXT: notl %esi -; X64-LIN-NEXT: andl %ecx, %esi -; X64-LIN-NEXT: addl %esi, %esi +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notl %ecx +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB3_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 ; X64-LIN-NEXT: retq @@ -94,12 +94,12 @@ define i32 @test4(i32 %a, i32 %b) nounwind { ; X64-WIN-NEXT: .p2align 4 ; X64-WIN-NEXT: .LBB3_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: movl %edx, %ecx ; X64-WIN-NEXT: xorl %edx, %eax -; X64-WIN-NEXT: movl %eax, %edx -; X64-WIN-NEXT: notl %edx -; X64-WIN-NEXT: andl %ecx, %edx -; X64-WIN-NEXT: addl %edx, %edx +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notl %ecx +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB3_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 ; X64-WIN-NEXT: retq @@ -126,13 +126,13 @@ define i16 @test5(i16 %a, i16 %b) nounwind { ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB4_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: xorl %edx, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: notl %ecx -; X86-NEXT: andl %edx, %ecx -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: testw %cx, %cx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: notl %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: testw %dx, %dx +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: jne .LBB4_1 ; X86-NEXT: # %bb.2: # %bb12 ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -144,13 +144,13 @@ define i16 @test5(i16 %a, i16 %b) nounwind { ; X64-LIN-NEXT: .p2align 4 ; X64-LIN-NEXT: .LBB4_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: movl %esi, %ecx -; X64-LIN-NEXT: xorl %ecx, %eax -; X64-LIN-NEXT: movl %eax, %esi -; X64-LIN-NEXT: notl %esi -; X64-LIN-NEXT: andl %ecx, %esi -; X64-LIN-NEXT: addl %esi, %esi -; X64-LIN-NEXT: testw %si, %si +; X64-LIN-NEXT: xorl %esi, %eax +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notl %ecx +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: testw %cx, %cx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB4_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 ; X64-LIN-NEXT: # kill: def $ax killed $ax killed $eax @@ -163,13 +163,13 @@ define i16 @test5(i16 %a, i16 %b) nounwind { ; X64-WIN-NEXT: .p2align 4 ; X64-WIN-NEXT: .LBB4_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: movl %edx, %ecx -; X64-WIN-NEXT: xorl %ecx, %eax -; X64-WIN-NEXT: movl %eax, %edx -; X64-WIN-NEXT: notl %edx -; X64-WIN-NEXT: andl %ecx, %edx -; X64-WIN-NEXT: addl %edx, %edx -; X64-WIN-NEXT: testw %dx, %dx +; X64-WIN-NEXT: xorl %edx, %eax +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notl %ecx +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: testw %cx, %cx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB4_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 ; X64-WIN-NEXT: # kill: def $ax killed $ax killed $eax @@ -197,12 +197,12 @@ define i8 @test6(i8 %a, i8 %b) nounwind { ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB5_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ecx, %edx ; X86-NEXT: xorb %cl, %al -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: notb %cl -; X86-NEXT: andb %dl, %cl -; X86-NEXT: addb %cl, %cl +; X86-NEXT: movl %eax, %edx +; X86-NEXT: notb %dl +; X86-NEXT: andb %cl, %dl +; X86-NEXT: addb %dl, %dl +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: jne .LBB5_1 ; X86-NEXT: # %bb.2: # %bb12 ; X86-NEXT: retl @@ -213,12 +213,12 @@ define i8 @test6(i8 %a, i8 %b) nounwind { ; X64-LIN-NEXT: .p2align 4 ; X64-LIN-NEXT: .LBB5_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: movl %esi, %ecx ; X64-LIN-NEXT: xorb %sil, %al -; X64-LIN-NEXT: movl %eax, %esi -; X64-LIN-NEXT: notb %sil -; X64-LIN-NEXT: andb %cl, %sil -; X64-LIN-NEXT: addb %sil, %sil +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notb %cl +; X64-LIN-NEXT: andb %sil, %cl +; X64-LIN-NEXT: addb %cl, %cl +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB5_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 ; X64-LIN-NEXT: # kill: def $al killed $al killed $eax @@ -230,12 +230,12 @@ define i8 @test6(i8 %a, i8 %b) nounwind { ; X64-WIN-NEXT: .p2align 4 ; X64-WIN-NEXT: .LBB5_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: movl %edx, %ecx ; X64-WIN-NEXT: xorb %dl, %al -; X64-WIN-NEXT: movl %eax, %edx -; X64-WIN-NEXT: notb %dl -; X64-WIN-NEXT: andb %cl, %dl -; X64-WIN-NEXT: addb %dl, %dl +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notb %cl +; X64-WIN-NEXT: andb %dl, %cl +; X64-WIN-NEXT: addb %cl, %cl +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB5_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 ; X64-WIN-NEXT: retq @@ -262,12 +262,12 @@ define i32 @test7(i32 %a, i32 %b) nounwind { ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB6_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ecx, %edx ; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE -; X86-NEXT: andl %edx, %ecx -; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: xorl $2147483646, %edx # imm = 0x7FFFFFFE +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: jne .LBB6_1 ; X86-NEXT: # %bb.2: # %bb12 ; X86-NEXT: retl @@ -278,12 +278,12 @@ define i32 @test7(i32 %a, i32 %b) nounwind { ; X64-LIN-NEXT: .p2align 4 ; X64-LIN-NEXT: .LBB6_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: movl %esi, %ecx ; X64-LIN-NEXT: xorl %esi, %eax -; X64-LIN-NEXT: movl %eax, %esi -; X64-LIN-NEXT: xorl $2147483646, %esi # imm = 0x7FFFFFFE -; X64-LIN-NEXT: andl %ecx, %esi -; X64-LIN-NEXT: addl %esi, %esi +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB6_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 ; X64-LIN-NEXT: retq @@ -294,12 +294,12 @@ define i32 @test7(i32 %a, i32 %b) nounwind { ; X64-WIN-NEXT: .p2align 4 ; X64-WIN-NEXT: .LBB6_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: movl %edx, %ecx ; X64-WIN-NEXT: xorl %edx, %eax -; X64-WIN-NEXT: movl %eax, %edx -; X64-WIN-NEXT: xorl $2147483646, %edx # imm = 0x7FFFFFFE -; X64-WIN-NEXT: andl %ecx, %edx -; X64-WIN-NEXT: addl %edx, %edx +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB6_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 ; X64-WIN-NEXT: retq |
