summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
-rw-r--r--llvm/test/CodeGen/PowerPC/O0-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/PowerPC/O3-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/PowerPC/aix-base-pointer.ll5
-rw-r--r--llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll111
-rw-r--r--llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll79
-rw-r--r--llvm/test/CodeGen/PowerPC/common-chain.ll313
6 files changed, 349 insertions, 161 deletions
diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
index f1c9da078a16..70b421f8c0c5 100644
--- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
@@ -25,7 +25,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index be0fbf3abcda..f4f492782eb6 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -62,7 +62,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
index ab222d770360..5e66e5ec2763 100644
--- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
@@ -6,6 +6,7 @@
; Use an overaligned buffer to force base-pointer usage. Test verifies:
; - base pointer register (r30) is saved/defined/restored.
+; - frame pointer register (r31) is saved/defined/restored.
; - stack frame is allocated with correct alignment.
; - Address of %AlignedBuffer is calculated based off offset from the stack
; pointer.
@@ -25,7 +26,9 @@ declare void @callee(ptr)
; 32BIT: subfic 0, 0, -224
; 32BIT: stwux 1, 1, 0
; 32BIT: addi 3, 1, 64
+; 32BIT: stw 31, -12(30)
; 32BIT: bl .callee
+; 32BIT: lwz 31, -12(30)
; 32BIT: mr 1, 30
; 32BIT: lwz 30, -16(1)
@@ -36,6 +39,8 @@ declare void @callee(ptr)
; 64BIT: subfic 0, 0, -288
; 64BIT: stdux 1, 1, 0
; 64BIT: addi 3, 1, 128
+; 64BIT: std 31, -16(30)
; 64BIT: bl .callee
+; 64BIT: ld 31, -16(30)
; 64BIT: mr 1, 30
; 64BIT: ld 30, -24(1)
diff --git a/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll b/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll
new file mode 100644
index 000000000000..cc5d6bee3c97
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX32
+
+define dso_local i64 @cdtbcd_test(i64 noundef %ll) {
+; CHECK-LABEL: cdtbcd_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cdtbcd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: cdtbcd_test:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: li r3, 0
+; CHECK-AIX32-NEXT: cdtbcd r4, r4
+; CHECK-AIX32-NEXT: blr
+entry:
+ %conv = trunc i64 %ll to i32
+ %0 = tail call i32 @llvm.ppc.cdtbcd(i32 %conv)
+ %conv1 = zext i32 %0 to i64
+ ret i64 %conv1
+}
+
+define dso_local zeroext i32 @cdtbcd_test_ui(i32 noundef zeroext %ui) {
+; CHECK-LABEL: cdtbcd_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cdtbcd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: cdtbcd_test_ui:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: cdtbcd r3, r3
+; CHECK-AIX32-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.cdtbcd(i32 %ui)
+ ret i32 %0
+}
+
+define dso_local i64 @cbcdtd_test(i64 noundef %ll) {
+; CHECK-LABEL: cbcdtd_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cbcdtd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: cbcdtd_test:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: li r3, 0
+; CHECK-AIX32-NEXT: cbcdtd r4, r4
+; CHECK-AIX32-NEXT: blr
+entry:
+ %conv = trunc i64 %ll to i32
+ %0 = tail call i32@llvm.ppc.cbcdtd(i32 %conv)
+ %conv1 = zext i32 %0 to i64
+ ret i64 %conv1
+}
+
+define dso_local zeroext i32 @cbcdtd_test_ui(i32 noundef zeroext %ui) {
+; CHECK-LABEL: cbcdtd_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cbcdtd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: cbcdtd_test_ui:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: cbcdtd r3, r3
+; CHECK-AIX32-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.cbcdtd(i32 %ui)
+ ret i32 %0
+}
+
+define dso_local i64 @addg6s_test(i64 noundef %ll, i64 noundef %ll2) {
+; CHECK-LABEL: addg6s_test:
+; CHECK: bb.0: # %entry
+; CHECK-NEXT: addg6s r3, r3, r4
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: addg6s_test:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: li r3, 0
+; CHECK-AIX32-NEXT: addg6s r4, r4, r6
+; CHECK-AIX32-NEXT: blr
+entry:
+ %conv = trunc i64 %ll to i32
+ %conv1 = trunc i64 %ll2 to i32
+ %0 = tail call i32 @llvm.ppc.addg6s(i32 %conv, i32 %conv1)
+ %conv2 = zext i32 %0 to i64
+ ret i64 %conv2
+}
+
+define dso_local zeroext i32 @addg6s_test_ui(i32 noundef zeroext %ui, i32 noundef zeroext %ui2) {
+; CHECK-LABEL: addg6s_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addg6s r3, r3, r4
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: addg6s_test_ui:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: addg6s r3, r3, r4
+; CHECK-AIX32-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.addg6s(i32 %ui, i32 %ui2)
+ ret i32 %0
+}
+
+declare i32 @llvm.ppc.cdtbcd(i32)
+declare i32 @llvm.ppc.cbcdtd(i32)
+declare i32 @llvm.ppc.addg6s(i32, i32)
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll
new file mode 100644
index 000000000000..d188f6014f0c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+
+define i64 @cdtbcd_test(i64 noundef %ll) {
+; CHECK-LABEL: cdtbcd_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cdtbcd r3, r3
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.ppc.cdtbcdd(i64 %ll)
+ ret i64 %0
+}
+
+define zeroext i32 @cdtbcd_test_ui(i32 noundef zeroext %ui) {
+; CHECK-LABEL: cdtbcd_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cdtbcd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+entry:
+ %conv = zext i32 %ui to i64
+ %0 = tail call i64 @llvm.ppc.cdtbcdd(i64 %conv)
+ %conv1 = trunc i64 %0 to i32
+ ret i32 %conv1
+}
+
+define i64 @cbcdtd_test(i64 noundef %ll) {
+; CHECK-LABEL: cbcdtd_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cbcdtd r3, r3
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.ppc.cbcdtdd(i64 %ll)
+ ret i64 %0
+}
+
+define zeroext i32 @cbcdtd_test_ui(i32 noundef zeroext %ui) {
+; CHECK-LABEL: cbcdtd_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cbcdtd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+entry:
+ %conv = zext i32 %ui to i64
+ %0 = tail call i64 @llvm.ppc.cbcdtdd(i64 %conv)
+ %conv1 = trunc i64 %0 to i32
+ ret i32 %conv1
+}
+
+define i64 @addg6s_test(i64 noundef %ll, i64 noundef %ll2) {
+; CHECK-LABEL: addg6s_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addg6s r3, r3, r4
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.ppc.addg6sd(i64 %ll, i64 %ll2)
+ ret i64 %0
+}
+
+define zeroext i32 @addg6s_test_ui(i32 noundef zeroext %ui, i32 noundef zeroext %ui2) {
+; CHECK-LABEL: addg6s_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addg6s r3, r3, r4
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+entry:
+ %conv = zext i32 %ui to i64
+ %conv1 = zext i32 %ui2 to i64
+ %0 = tail call i64 @llvm.ppc.addg6sd(i64 %conv, i64 %conv1)
+ %conv2 = trunc i64 %0 to i32
+ ret i32 %conv2
+}
+
+declare i64 @llvm.ppc.cdtbcdd(i64)
+declare i64 @llvm.ppc.cbcdtdd(i64)
+declare i64 @llvm.ppc.addg6sd(i64, i64)
diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index ccf0e4520f46..b71a360d1be1 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r4, -160(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill
; CHECK-NEXT: ble cr0, .LBB7_7
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: sldi r4, r6, 2
-; CHECK-NEXT: li r6, 1
-; CHECK-NEXT: mr r0, r10
-; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill
-; CHECK-NEXT: cmpdi r4, 1
-; CHECK-NEXT: iselgt r4, r4, r6
-; CHECK-NEXT: addi r7, r4, -1
-; CHECK-NEXT: clrldi r6, r4, 63
-; CHECK-NEXT: cmpldi r7, 3
+; CHECK-NEXT: sldi r6, r6, 2
+; CHECK-NEXT: li r7, 1
+; CHECK-NEXT: mr r30, r10
+; CHECK-NEXT: cmpdi r6, 1
+; CHECK-NEXT: iselgt r7, r6, r7
+; CHECK-NEXT: addi r8, r7, -1
+; CHECK-NEXT: clrldi r6, r7, 63
+; CHECK-NEXT: cmpldi r8, 3
; CHECK-NEXT: blt cr0, .LBB7_4
; CHECK-NEXT: # %bb.2: # %for.body.preheader.new
-; CHECK-NEXT: ld r0, -192(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r8, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: rldicl r7, r4, 62, 2
-; CHECK-NEXT: ld r9, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r11, r0, r30
-; CHECK-NEXT: add r4, r0, r0
-; CHECK-NEXT: mulli r23, r0, 24
-; CHECK-NEXT: add r14, r0, r8
-; CHECK-NEXT: sldi r12, r0, 5
-; CHECK-NEXT: add r31, r0, r9
-; CHECK-NEXT: sldi r9, r9, 3
-; CHECK-NEXT: sldi r18, r0, 4
-; CHECK-NEXT: sldi r8, r8, 3
-; CHECK-NEXT: add r10, r4, r4
-; CHECK-NEXT: sldi r4, r30, 3
-; CHECK-NEXT: sldi r11, r11, 3
-; CHECK-NEXT: add r26, r12, r9
-; CHECK-NEXT: add r16, r18, r9
-; CHECK-NEXT: add r29, r12, r8
-; CHECK-NEXT: add r19, r18, r8
-; CHECK-NEXT: add r30, r12, r4
-; CHECK-NEXT: mr r20, r4
-; CHECK-NEXT: std r4, -200(r1) # 8-byte Folded Spill
-; CHECK-NEXT: ld r4, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r15, r5, r11
-; CHECK-NEXT: sldi r11, r14, 3
-; CHECK-NEXT: add r29, r5, r29
-; CHECK-NEXT: add r28, r3, r26
-; CHECK-NEXT: add r19, r5, r19
-; CHECK-NEXT: add r21, r23, r9
-; CHECK-NEXT: add r24, r23, r8
-; CHECK-NEXT: add r14, r5, r11
-; CHECK-NEXT: sldi r11, r31, 3
-; CHECK-NEXT: add r25, r23, r20
-; CHECK-NEXT: add r20, r18, r20
-; CHECK-NEXT: add r30, r5, r30
-; CHECK-NEXT: add r18, r3, r16
-; CHECK-NEXT: add r24, r5, r24
-; CHECK-NEXT: add r23, r3, r21
-; CHECK-NEXT: add r27, r4, r26
-; CHECK-NEXT: add r22, r4, r21
-; CHECK-NEXT: add r17, r4, r16
-; CHECK-NEXT: add r2, r4, r11
-; CHECK-NEXT: rldicl r4, r7, 2, 1
-; CHECK-NEXT: sub r7, r8, r9
-; CHECK-NEXT: ld r8, -200(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mulli r24, r30, 24
+; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: rldicl r0, r7, 62, 2
+; CHECK-NEXT: sldi r11, r30, 5
+; CHECK-NEXT: sldi r19, r30, 4
+; CHECK-NEXT: sldi r7, r14, 3
+; CHECK-NEXT: add r14, r30, r14
+; CHECK-NEXT: sldi r10, r16, 3
+; CHECK-NEXT: sldi r12, r15, 3
+; CHECK-NEXT: add r16, r30, r16
+; CHECK-NEXT: add r15, r30, r15
+; CHECK-NEXT: add r27, r11, r7
+; CHECK-NEXT: add r22, r24, r7
+; CHECK-NEXT: add r17, r19, r7
+; CHECK-NEXT: sldi r2, r14, 3
+; CHECK-NEXT: add r26, r24, r10
+; CHECK-NEXT: add r25, r24, r12
+; CHECK-NEXT: add r21, r19, r10
+; CHECK-NEXT: add r20, r19, r12
+; CHECK-NEXT: add r8, r11, r10
+; CHECK-NEXT: sldi r16, r16, 3
+; CHECK-NEXT: add r29, r5, r27
+; CHECK-NEXT: add r28, r4, r27
+; CHECK-NEXT: add r27, r3, r27
+; CHECK-NEXT: add r24, r5, r22
+; CHECK-NEXT: add r23, r4, r22
+; CHECK-NEXT: add r22, r3, r22
+; CHECK-NEXT: add r19, r5, r17
+; CHECK-NEXT: add r18, r4, r17
+; CHECK-NEXT: add r17, r3, r17
+; CHECK-NEXT: add r14, r5, r2
+; CHECK-NEXT: add r31, r4, r2
+; CHECK-NEXT: add r2, r3, r2
+; CHECK-NEXT: add r9, r5, r8
+; CHECK-NEXT: add r8, r11, r12
; CHECK-NEXT: add r26, r5, r26
; CHECK-NEXT: add r25, r5, r25
; CHECK-NEXT: add r21, r5, r21
; CHECK-NEXT: add r20, r5, r20
; CHECK-NEXT: add r16, r5, r16
-; CHECK-NEXT: add r31, r5, r11
-; CHECK-NEXT: add r11, r3, r11
-; CHECK-NEXT: addi r4, r4, -4
-; CHECK-NEXT: rldicl r4, r4, 62, 2
-; CHECK-NEXT: sub r8, r8, r9
-; CHECK-NEXT: li r9, 0
-; CHECK-NEXT: addi r4, r4, 1
-; CHECK-NEXT: mtctr r4
+; CHECK-NEXT: add r8, r5, r8
+; CHECK-NEXT: rldicl r3, r0, 2, 1
+; CHECK-NEXT: addi r3, r3, -4
+; CHECK-NEXT: sub r0, r12, r7
+; CHECK-NEXT: sub r12, r10, r7
+; CHECK-NEXT: li r7, 0
+; CHECK-NEXT: mr r10, r30
+; CHECK-NEXT: sldi r15, r15, 3
+; CHECK-NEXT: add r15, r5, r15
+; CHECK-NEXT: rldicl r3, r3, 62, 2
+; CHECK-NEXT: addi r3, r3, 1
+; CHECK-NEXT: mtctr r3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_3: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lfd f0, 0(r11)
-; CHECK-NEXT: lfd f1, 0(r2)
-; CHECK-NEXT: add r0, r0, r10
-; CHECK-NEXT: xsmuldp f0, f0, f1
+; CHECK-NEXT: lfd f0, 0(r2)
; CHECK-NEXT: lfd f1, 0(r31)
+; CHECK-NEXT: add r3, r10, r30
+; CHECK-NEXT: add r3, r3, r30
+; CHECK-NEXT: xsmuldp f0, f0, f1
+; CHECK-NEXT: lfd f1, 0(r14)
+; CHECK-NEXT: add r3, r3, r30
+; CHECK-NEXT: add r10, r3, r30
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfd f0, 0(r31)
-; CHECK-NEXT: add r31, r31, r12
-; CHECK-NEXT: lfdx f0, r11, r7
-; CHECK-NEXT: lfdx f1, r2, r7
+; CHECK-NEXT: stfd f0, 0(r14)
+; CHECK-NEXT: add r14, r14, r11
+; CHECK-NEXT: lfdx f0, r2, r0
+; CHECK-NEXT: lfdx f1, r31, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r14, r9
+; CHECK-NEXT: lfdx f1, r15, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r14, r9
-; CHECK-NEXT: lfdx f0, r11, r8
-; CHECK-NEXT: lfdx f1, r2, r8
-; CHECK-NEXT: add r11, r11, r12
-; CHECK-NEXT: add r2, r2, r12
+; CHECK-NEXT: stfdx f0, r15, r7
+; CHECK-NEXT: lfdx f0, r2, r12
+; CHECK-NEXT: lfdx f1, r31, r12
+; CHECK-NEXT: add r2, r2, r11
+; CHECK-NEXT: add r31, r31, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r15, r9
+; CHECK-NEXT: lfdx f1, r16, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r15, r9
-; CHECK-NEXT: lfd f0, 0(r18)
-; CHECK-NEXT: lfd f1, 0(r17)
+; CHECK-NEXT: stfdx f0, r16, r7
+; CHECK-NEXT: lfd f0, 0(r17)
+; CHECK-NEXT: lfd f1, 0(r18)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r16, r9
+; CHECK-NEXT: lfdx f1, r19, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r16, r9
-; CHECK-NEXT: lfdx f0, r18, r7
-; CHECK-NEXT: lfdx f1, r17, r7
+; CHECK-NEXT: stfdx f0, r19, r7
+; CHECK-NEXT: lfdx f0, r17, r0
+; CHECK-NEXT: lfdx f1, r18, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r19, r9
+; CHECK-NEXT: lfdx f1, r20, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r19, r9
-; CHECK-NEXT: lfdx f0, r18, r8
-; CHECK-NEXT: lfdx f1, r17, r8
-; CHECK-NEXT: add r18, r18, r12
-; CHECK-NEXT: add r17, r17, r12
+; CHECK-NEXT: stfdx f0, r20, r7
+; CHECK-NEXT: lfdx f0, r17, r12
+; CHECK-NEXT: lfdx f1, r18, r12
+; CHECK-NEXT: add r17, r17, r11
+; CHECK-NEXT: add r18, r18, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r20, r9
+; CHECK-NEXT: lfdx f1, r21, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r20, r9
-; CHECK-NEXT: lfd f0, 0(r23)
-; CHECK-NEXT: lfd f1, 0(r22)
+; CHECK-NEXT: stfdx f0, r21, r7
+; CHECK-NEXT: lfd f0, 0(r22)
+; CHECK-NEXT: lfd f1, 0(r23)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r21, r9
+; CHECK-NEXT: lfdx f1, r24, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r21, r9
-; CHECK-NEXT: lfdx f0, r23, r7
-; CHECK-NEXT: lfdx f1, r22, r7
+; CHECK-NEXT: stfdx f0, r24, r7
+; CHECK-NEXT: lfdx f0, r22, r0
+; CHECK-NEXT: lfdx f1, r23, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r24, r9
+; CHECK-NEXT: lfdx f1, r25, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r24, r9
-; CHECK-NEXT: lfdx f0, r23, r8
-; CHECK-NEXT: lfdx f1, r22, r8
-; CHECK-NEXT: add r23, r23, r12
-; CHECK-NEXT: add r22, r22, r12
+; CHECK-NEXT: stfdx f0, r25, r7
+; CHECK-NEXT: lfdx f0, r22, r12
+; CHECK-NEXT: lfdx f1, r23, r12
+; CHECK-NEXT: add r22, r22, r11
+; CHECK-NEXT: add r23, r23, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r25, r9
+; CHECK-NEXT: lfdx f1, r26, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r25, r9
-; CHECK-NEXT: lfd f0, 0(r28)
-; CHECK-NEXT: lfd f1, 0(r27)
+; CHECK-NEXT: stfdx f0, r26, r7
+; CHECK-NEXT: lfd f0, 0(r27)
+; CHECK-NEXT: lfd f1, 0(r28)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r26, r9
+; CHECK-NEXT: lfdx f1, r29, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r26, r9
-; CHECK-NEXT: lfdx f0, r28, r7
-; CHECK-NEXT: lfdx f1, r27, r7
+; CHECK-NEXT: stfdx f0, r29, r7
+; CHECK-NEXT: lfdx f0, r27, r0
+; CHECK-NEXT: lfdx f1, r28, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r29, r9
+; CHECK-NEXT: lfdx f1, r8, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r29, r9
-; CHECK-NEXT: lfdx f0, r28, r8
-; CHECK-NEXT: lfdx f1, r27, r8
-; CHECK-NEXT: add r28, r28, r12
-; CHECK-NEXT: add r27, r27, r12
+; CHECK-NEXT: stfdx f0, r8, r7
+; CHECK-NEXT: lfdx f0, r27, r12
+; CHECK-NEXT: lfdx f1, r28, r12
+; CHECK-NEXT: add r27, r27, r11
+; CHECK-NEXT: add r28, r28, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r30, r9
+; CHECK-NEXT: lfdx f1, r9, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r30, r9
-; CHECK-NEXT: add r9, r9, r12
+; CHECK-NEXT: stfdx f0, r9, r7
+; CHECK-NEXT: add r7, r7, r11
; CHECK-NEXT: bdnz .LBB7_3
; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT: ld r7, -192(r1) # 8-byte Folded Reload
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: beq cr0, .LBB7_7
; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader
-; CHECK-NEXT: ld r4, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: sldi r7, r7, 3
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r4, r4, 3
-; CHECK-NEXT: add r3, r5, r4
-; CHECK-NEXT: add r8, r29, r4
-; CHECK-NEXT: add r9, r30, r4
-; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r4, r4, 3
-; CHECK-NEXT: add r10, r5, r4
-; CHECK-NEXT: add r11, r29, r4
-; CHECK-NEXT: add r12, r30, r4
-; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r0, r4, 3
-; CHECK-NEXT: add r5, r5, r0
-; CHECK-NEXT: add r4, r29, r0
-; CHECK-NEXT: add r30, r30, r0
-; CHECK-NEXT: li r0, 0
+; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: sldi r8, r30, 3
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r7, r5, r3
+; CHECK-NEXT: add r9, r4, r3
+; CHECK-NEXT: add r11, r0, r3
+; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r12, r5, r3
+; CHECK-NEXT: add r30, r4, r3
+; CHECK-NEXT: add r29, r0, r3
+; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: li r10, 0
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r5, r5, r3
+; CHECK-NEXT: add r4, r4, r3
+; CHECK-NEXT: add r3, r0, r3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_6: # %for.body.epil
; CHECK-NEXT: #
-; CHECK-NEXT: lfdx f0, r30, r0
-; CHECK-NEXT: lfdx f1, r4, r0
+; CHECK-NEXT: lfdx f0, r3, r10
+; CHECK-NEXT: lfdx f1, r4, r10
; CHECK-NEXT: addi r6, r6, -1
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfd f1, 0(r5)
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfd f0, 0(r5)
-; CHECK-NEXT: add r5, r5, r7
-; CHECK-NEXT: lfdx f0, r12, r0
-; CHECK-NEXT: lfdx f1, r11, r0
+; CHECK-NEXT: add r5, r5, r8
+; CHECK-NEXT: lfdx f0, r29, r10
+; CHECK-NEXT: lfdx f1, r30, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r10, r0
+; CHECK-NEXT: lfdx f1, r12, r10
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r10, r0
-; CHECK-NEXT: lfdx f0, r9, r0
-; CHECK-NEXT: lfdx f1, r8, r0
+; CHECK-NEXT: stfdx f0, r12, r10
+; CHECK-NEXT: lfdx f0, r11, r10
+; CHECK-NEXT: lfdx f1, r9, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r3, r0
+; CHECK-NEXT: lfdx f1, r7, r10
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r3, r0
-; CHECK-NEXT: add r0, r0, r7
+; CHECK-NEXT: stfdx f0, r7, r10
+; CHECK-NEXT: add r10, r10, r8
; CHECK-NEXT: bne cr0, .LBB7_6
; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload