diff options
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/O0-pipeline.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/aix-base-pointer.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll | 111 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll | 79 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/common-chain.ll | 313 |
6 files changed, 349 insertions, 161 deletions
diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll index f1c9da078a16..70b421f8c0c5 100644 --- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll @@ -25,7 +25,6 @@ ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Remove unreachable blocks from the CFG -; CHECK-NEXT: Expand vector predication intrinsics ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index be0fbf3abcda..f4f492782eb6 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -62,7 +62,6 @@ ; CHECK-NEXT: Constant Hoisting ; CHECK-NEXT: Replace intrinsics with calls to vector library ; CHECK-NEXT: Partially inline calls to library functions -; CHECK-NEXT: Expand vector predication intrinsics ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics diff --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll index ab222d770360..5e66e5ec2763 100644 --- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll +++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll @@ -6,6 +6,7 @@ ; Use an overaligned buffer to force base-pointer usage. Test verifies: ; - base pointer register (r30) is saved/defined/restored. +; - frame pointer register (r31) is saved/defined/restored. ; - stack frame is allocated with correct alignment. ; - Address of %AlignedBuffer is calculated based off offset from the stack ; pointer. @@ -25,7 +26,9 @@ declare void @callee(ptr) ; 32BIT: subfic 0, 0, -224 ; 32BIT: stwux 1, 1, 0 ; 32BIT: addi 3, 1, 64 +; 32BIT: stw 31, -12(30) ; 32BIT: bl .callee +; 32BIT: lwz 31, -12(30) ; 32BIT: mr 1, 30 ; 32BIT: lwz 30, -16(1) @@ -36,6 +39,8 @@ declare void @callee(ptr) ; 64BIT: subfic 0, 0, -288 ; 64BIT: stdux 1, 1, 0 ; 64BIT: addi 3, 1, 128 +; 64BIT: std 31, -16(30) ; 64BIT: bl .callee +; 64BIT: ld 31, -16(30) ; 64BIT: mr 1, 30 ; 64BIT: ld 30, -24(1) diff --git a/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll b/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll new file mode 100644 index 000000000000..cc5d6bee3c97 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \ +; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ +; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX32 + +define dso_local i64 @cdtbcd_test(i64 noundef %ll) { +; CHECK-LABEL: cdtbcd_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cdtbcd r3, r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; CHECK-AIX32-LABEL: cdtbcd_test: +; CHECK-AIX32: # %bb.0: # %entry +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: cdtbcd r4, r4 +; CHECK-AIX32-NEXT: blr +entry: + %conv = trunc i64 %ll to i32 + %0 = tail call i32 @llvm.ppc.cdtbcd(i32 %conv) + %conv1 = zext i32 %0 to i64 + ret i64 %conv1 +} + +define dso_local zeroext i32 @cdtbcd_test_ui(i32 noundef zeroext %ui) { +; CHECK-LABEL: cdtbcd_test_ui: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cdtbcd r3, r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; CHECK-AIX32-LABEL: cdtbcd_test_ui: +; CHECK-AIX32: # %bb.0: # %entry +; CHECK-AIX32-NEXT: cdtbcd r3, r3 +; CHECK-AIX32-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.cdtbcd(i32 %ui) + ret i32 %0 +} + +define dso_local i64 @cbcdtd_test(i64 noundef %ll) { +; CHECK-LABEL: cbcdtd_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cbcdtd r3, r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; CHECK-AIX32-LABEL: cbcdtd_test: +; CHECK-AIX32: # %bb.0: # %entry +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: cbcdtd r4, r4 +; CHECK-AIX32-NEXT: blr +entry: + %conv = trunc i64 %ll to i32 + %0 = tail call i32@llvm.ppc.cbcdtd(i32 %conv) + %conv1 = zext i32 %0 to i64 + ret i64 %conv1 +} + +define dso_local zeroext i32 @cbcdtd_test_ui(i32 noundef zeroext %ui) { +; CHECK-LABEL: cbcdtd_test_ui: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cbcdtd r3, r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; CHECK-AIX32-LABEL: cbcdtd_test_ui: +; CHECK-AIX32: # %bb.0: # %entry +; CHECK-AIX32-NEXT: cbcdtd r3, r3 +; CHECK-AIX32-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.cbcdtd(i32 %ui) + ret i32 %0 +} + +define dso_local i64 @addg6s_test(i64 noundef %ll, i64 noundef %ll2) { +; CHECK-LABEL: addg6s_test: +; CHECK: bb.0: # %entry +; CHECK-NEXT: addg6s r3, r3, r4 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; CHECK-AIX32-LABEL: addg6s_test: +; CHECK-AIX32: # %bb.0: # %entry +; CHECK-AIX32-NEXT: li r3, 0 +; CHECK-AIX32-NEXT: addg6s r4, r4, r6 +; CHECK-AIX32-NEXT: blr +entry: + %conv = trunc i64 %ll to i32 + %conv1 = trunc i64 %ll2 to i32 + %0 = tail call i32 @llvm.ppc.addg6s(i32 %conv, i32 %conv1) + %conv2 = zext i32 %0 to i64 + ret i64 %conv2 +} + +define dso_local zeroext i32 @addg6s_test_ui(i32 noundef zeroext %ui, i32 noundef zeroext %ui2) { +; CHECK-LABEL: addg6s_test_ui: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addg6s r3, r3, r4 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; CHECK-AIX32-LABEL: addg6s_test_ui: +; CHECK-AIX32: # %bb.0: # %entry +; CHECK-AIX32-NEXT: addg6s r3, r3, r4 +; CHECK-AIX32-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.addg6s(i32 %ui, i32 %ui2) + ret i32 %0 +} + +declare i32 @llvm.ppc.cdtbcd(i32) +declare i32 @llvm.ppc.cbcdtd(i32) +declare i32 @llvm.ppc.addg6s(i32, i32) diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll new file mode 100644 index 000000000000..d188f6014f0c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \ +; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s + +define i64 @cdtbcd_test(i64 noundef %ll) { +; CHECK-LABEL: cdtbcd_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cdtbcd r3, r3 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.ppc.cdtbcdd(i64 %ll) + ret i64 %0 +} + +define zeroext i32 @cdtbcd_test_ui(i32 noundef zeroext %ui) { +; CHECK-LABEL: cdtbcd_test_ui: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cdtbcd r3, r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +entry: + %conv = zext i32 %ui to i64 + %0 = tail call i64 @llvm.ppc.cdtbcdd(i64 %conv) + %conv1 = trunc i64 %0 to i32 + ret i32 %conv1 +} + +define i64 @cbcdtd_test(i64 noundef %ll) { +; CHECK-LABEL: cbcdtd_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cbcdtd r3, r3 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.ppc.cbcdtdd(i64 %ll) + ret i64 %0 +} + +define zeroext i32 @cbcdtd_test_ui(i32 noundef zeroext %ui) { +; CHECK-LABEL: cbcdtd_test_ui: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cbcdtd r3, r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +entry: + %conv = zext i32 %ui to i64 + %0 = tail call i64 @llvm.ppc.cbcdtdd(i64 %conv) + %conv1 = trunc i64 %0 to i32 + ret i32 %conv1 +} + +define i64 @addg6s_test(i64 noundef %ll, i64 noundef %ll2) { +; CHECK-LABEL: addg6s_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addg6s r3, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.ppc.addg6sd(i64 %ll, i64 %ll2) + ret i64 %0 +} + +define zeroext i32 @addg6s_test_ui(i32 noundef zeroext %ui, i32 noundef zeroext %ui2) { +; CHECK-LABEL: addg6s_test_ui: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addg6s r3, r3, r4 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +entry: + %conv = zext i32 %ui to i64 + %conv1 = zext i32 %ui2 to i64 + %0 = tail call i64 @llvm.ppc.addg6sd(i64 %conv, i64 %conv1) + %conv2 = trunc i64 %0 to i32 + ret i32 %conv2 +} + +declare i64 @llvm.ppc.cdtbcdd(i64) +declare i64 @llvm.ppc.cbcdtdd(i64) +declare i64 @llvm.ppc.addg6sd(i64, i64) diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll index ccf0e4520f46..b71a360d1be1 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r4, -160(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill ; CHECK-NEXT: ble cr0, .LBB7_7 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r4, r6, 2 -; CHECK-NEXT: li r6, 1 -; CHECK-NEXT: mr r0, r10 -; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill -; CHECK-NEXT: cmpdi r4, 1 -; CHECK-NEXT: iselgt r4, r4, r6 -; CHECK-NEXT: addi r7, r4, -1 -; CHECK-NEXT: clrldi r6, r4, 63 -; CHECK-NEXT: cmpldi r7, 3 +; CHECK-NEXT: sldi r6, r6, 2 +; CHECK-NEXT: li r7, 1 +; CHECK-NEXT: mr r30, r10 +; CHECK-NEXT: cmpdi r6, 1 +; CHECK-NEXT: iselgt r7, r6, r7 +; CHECK-NEXT: addi r8, r7, -1 +; CHECK-NEXT: clrldi r6, r7, 63 +; CHECK-NEXT: cmpldi r8, 3 ; CHECK-NEXT: blt cr0, .LBB7_4 ; CHECK-NEXT: # %bb.2: # %for.body.preheader.new -; CHECK-NEXT: ld r0, -192(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, -184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r8, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: rldicl r7, r4, 62, 2 -; CHECK-NEXT: ld r9, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r11, r0, r30 -; CHECK-NEXT: add r4, r0, r0 -; CHECK-NEXT: mulli r23, r0, 24 -; CHECK-NEXT: add r14, r0, r8 -; CHECK-NEXT: sldi r12, r0, 5 -; CHECK-NEXT: add r31, r0, r9 -; CHECK-NEXT: sldi r9, r9, 3 -; CHECK-NEXT: sldi r18, r0, 4 -; CHECK-NEXT: sldi r8, r8, 3 -; CHECK-NEXT: add r10, r4, r4 -; CHECK-NEXT: sldi r4, r30, 3 -; CHECK-NEXT: sldi r11, r11, 3 -; CHECK-NEXT: add r26, r12, r9 -; CHECK-NEXT: add r16, r18, r9 -; CHECK-NEXT: add r29, r12, r8 -; CHECK-NEXT: add r19, r18, r8 -; CHECK-NEXT: add r30, r12, r4 -; CHECK-NEXT: mr r20, r4 -; CHECK-NEXT: std r4, -200(r1) # 8-byte Folded Spill -; CHECK-NEXT: ld r4, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r15, r5, r11 -; CHECK-NEXT: sldi r11, r14, 3 -; CHECK-NEXT: add r29, r5, r29 -; CHECK-NEXT: add r28, r3, r26 -; CHECK-NEXT: add r19, r5, r19 -; CHECK-NEXT: add r21, r23, r9 -; CHECK-NEXT: add r24, r23, r8 -; CHECK-NEXT: add r14, r5, r11 -; CHECK-NEXT: sldi r11, r31, 3 -; CHECK-NEXT: add r25, r23, r20 -; CHECK-NEXT: add r20, r18, r20 -; CHECK-NEXT: add r30, r5, r30 -; CHECK-NEXT: add r18, r3, r16 -; CHECK-NEXT: add r24, r5, r24 -; CHECK-NEXT: add r23, r3, r21 -; CHECK-NEXT: add r27, r4, r26 -; CHECK-NEXT: add r22, r4, r21 -; CHECK-NEXT: add r17, r4, r16 -; CHECK-NEXT: add r2, r4, r11 -; CHECK-NEXT: rldicl r4, r7, 2, 1 -; CHECK-NEXT: sub r7, r8, r9 -; CHECK-NEXT: ld r8, -200(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload +; CHECK-NEXT: mulli r24, r30, 24 +; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: rldicl r0, r7, 62, 2 +; CHECK-NEXT: sldi r11, r30, 5 +; CHECK-NEXT: sldi r19, r30, 4 +; CHECK-NEXT: sldi r7, r14, 3 +; CHECK-NEXT: add r14, r30, r14 +; CHECK-NEXT: sldi r10, r16, 3 +; CHECK-NEXT: sldi r12, r15, 3 +; CHECK-NEXT: add r16, r30, r16 +; CHECK-NEXT: add r15, r30, r15 +; CHECK-NEXT: add r27, r11, r7 +; CHECK-NEXT: add r22, r24, r7 +; CHECK-NEXT: add r17, r19, r7 +; CHECK-NEXT: sldi r2, r14, 3 +; CHECK-NEXT: add r26, r24, r10 +; CHECK-NEXT: add r25, r24, r12 +; CHECK-NEXT: add r21, r19, r10 +; CHECK-NEXT: add r20, r19, r12 +; CHECK-NEXT: add r8, r11, r10 +; CHECK-NEXT: sldi r16, r16, 3 +; CHECK-NEXT: add r29, r5, r27 +; CHECK-NEXT: add r28, r4, r27 +; CHECK-NEXT: add r27, r3, r27 +; CHECK-NEXT: add r24, r5, r22 +; CHECK-NEXT: add r23, r4, r22 +; CHECK-NEXT: add r22, r3, r22 +; CHECK-NEXT: add r19, r5, r17 +; CHECK-NEXT: add r18, r4, r17 +; CHECK-NEXT: add r17, r3, r17 +; CHECK-NEXT: add r14, r5, r2 +; CHECK-NEXT: add r31, r4, r2 +; CHECK-NEXT: add r2, r3, r2 +; CHECK-NEXT: add r9, r5, r8 +; CHECK-NEXT: add r8, r11, r12 ; CHECK-NEXT: add r26, r5, r26 ; CHECK-NEXT: add r25, r5, r25 ; CHECK-NEXT: add r21, r5, r21 ; CHECK-NEXT: add r20, r5, r20 ; CHECK-NEXT: add r16, r5, r16 -; CHECK-NEXT: add r31, r5, r11 -; CHECK-NEXT: add r11, r3, r11 -; CHECK-NEXT: addi r4, r4, -4 -; CHECK-NEXT: rldicl r4, r4, 62, 2 -; CHECK-NEXT: sub r8, r8, r9 -; CHECK-NEXT: li r9, 0 -; CHECK-NEXT: addi r4, r4, 1 -; CHECK-NEXT: mtctr r4 +; CHECK-NEXT: add r8, r5, r8 +; CHECK-NEXT: rldicl r3, r0, 2, 1 +; CHECK-NEXT: addi r3, r3, -4 +; CHECK-NEXT: sub r0, r12, r7 +; CHECK-NEXT: sub r12, r10, r7 +; CHECK-NEXT: li r7, 0 +; CHECK-NEXT: mr r10, r30 +; CHECK-NEXT: sldi r15, r15, 3 +; CHECK-NEXT: add r15, r5, r15 +; CHECK-NEXT: rldicl r3, r3, 62, 2 +; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB7_3: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lfd f0, 0(r11) -; CHECK-NEXT: lfd f1, 0(r2) -; CHECK-NEXT: add r0, r0, r10 -; CHECK-NEXT: xsmuldp f0, f0, f1 +; CHECK-NEXT: lfd f0, 0(r2) ; CHECK-NEXT: lfd f1, 0(r31) +; CHECK-NEXT: add r3, r10, r30 +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: xsmuldp f0, f0, f1 +; CHECK-NEXT: lfd f1, 0(r14) +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: add r10, r3, r30 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfd f0, 0(r31) -; CHECK-NEXT: add r31, r31, r12 -; CHECK-NEXT: lfdx f0, r11, r7 -; CHECK-NEXT: lfdx f1, r2, r7 +; CHECK-NEXT: stfd f0, 0(r14) +; CHECK-NEXT: add r14, r14, r11 +; CHECK-NEXT: lfdx f0, r2, r0 +; CHECK-NEXT: lfdx f1, r31, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r14, r9 +; CHECK-NEXT: lfdx f1, r15, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r14, r9 -; CHECK-NEXT: lfdx f0, r11, r8 -; CHECK-NEXT: lfdx f1, r2, r8 -; CHECK-NEXT: add r11, r11, r12 -; CHECK-NEXT: add r2, r2, r12 +; CHECK-NEXT: stfdx f0, r15, r7 +; CHECK-NEXT: lfdx f0, r2, r12 +; CHECK-NEXT: lfdx f1, r31, r12 +; CHECK-NEXT: add r2, r2, r11 +; CHECK-NEXT: add r31, r31, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r15, r9 +; CHECK-NEXT: lfdx f1, r16, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r15, r9 -; CHECK-NEXT: lfd f0, 0(r18) -; CHECK-NEXT: lfd f1, 0(r17) +; CHECK-NEXT: stfdx f0, r16, r7 +; CHECK-NEXT: lfd f0, 0(r17) +; CHECK-NEXT: lfd f1, 0(r18) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r16, r9 +; CHECK-NEXT: lfdx f1, r19, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r16, r9 -; CHECK-NEXT: lfdx f0, r18, r7 -; CHECK-NEXT: lfdx f1, r17, r7 +; CHECK-NEXT: stfdx f0, r19, r7 +; CHECK-NEXT: lfdx f0, r17, r0 +; CHECK-NEXT: lfdx f1, r18, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r19, r9 +; CHECK-NEXT: lfdx f1, r20, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r19, r9 -; CHECK-NEXT: lfdx f0, r18, r8 -; CHECK-NEXT: lfdx f1, r17, r8 -; CHECK-NEXT: add r18, r18, r12 -; CHECK-NEXT: add r17, r17, r12 +; CHECK-NEXT: stfdx f0, r20, r7 +; CHECK-NEXT: lfdx f0, r17, r12 +; CHECK-NEXT: lfdx f1, r18, r12 +; CHECK-NEXT: add r17, r17, r11 +; CHECK-NEXT: add r18, r18, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r20, r9 +; CHECK-NEXT: lfdx f1, r21, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r20, r9 -; CHECK-NEXT: lfd f0, 0(r23) -; CHECK-NEXT: lfd f1, 0(r22) +; CHECK-NEXT: stfdx f0, r21, r7 +; CHECK-NEXT: lfd f0, 0(r22) +; CHECK-NEXT: lfd f1, 0(r23) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r21, r9 +; CHECK-NEXT: lfdx f1, r24, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r21, r9 -; CHECK-NEXT: lfdx f0, r23, r7 -; CHECK-NEXT: lfdx f1, r22, r7 +; CHECK-NEXT: stfdx f0, r24, r7 +; CHECK-NEXT: lfdx f0, r22, r0 +; CHECK-NEXT: lfdx f1, r23, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r24, r9 +; CHECK-NEXT: lfdx f1, r25, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r24, r9 -; CHECK-NEXT: lfdx f0, r23, r8 -; CHECK-NEXT: lfdx f1, r22, r8 -; CHECK-NEXT: add r23, r23, r12 -; CHECK-NEXT: add r22, r22, r12 +; CHECK-NEXT: stfdx f0, r25, r7 +; CHECK-NEXT: lfdx f0, r22, r12 +; CHECK-NEXT: lfdx f1, r23, r12 +; CHECK-NEXT: add r22, r22, r11 +; CHECK-NEXT: add r23, r23, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r25, r9 +; CHECK-NEXT: lfdx f1, r26, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r25, r9 -; CHECK-NEXT: lfd f0, 0(r28) -; CHECK-NEXT: lfd f1, 0(r27) +; CHECK-NEXT: stfdx f0, r26, r7 +; CHECK-NEXT: lfd f0, 0(r27) +; CHECK-NEXT: lfd f1, 0(r28) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r26, r9 +; CHECK-NEXT: lfdx f1, r29, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r26, r9 -; CHECK-NEXT: lfdx f0, r28, r7 -; CHECK-NEXT: lfdx f1, r27, r7 +; CHECK-NEXT: stfdx f0, r29, r7 +; CHECK-NEXT: lfdx f0, r27, r0 +; CHECK-NEXT: lfdx f1, r28, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r29, r9 +; CHECK-NEXT: lfdx f1, r8, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r29, r9 -; CHECK-NEXT: lfdx f0, r28, r8 -; CHECK-NEXT: lfdx f1, r27, r8 -; CHECK-NEXT: add r28, r28, r12 -; CHECK-NEXT: add r27, r27, r12 +; CHECK-NEXT: stfdx f0, r8, r7 +; CHECK-NEXT: lfdx f0, r27, r12 +; CHECK-NEXT: lfdx f1, r28, r12 +; CHECK-NEXT: add r27, r27, r11 +; CHECK-NEXT: add r28, r28, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r30, r9 +; CHECK-NEXT: lfdx f1, r9, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r30, r9 -; CHECK-NEXT: add r9, r9, r12 +; CHECK-NEXT: stfdx f0, r9, r7 +; CHECK-NEXT: add r7, r7, r11 ; CHECK-NEXT: bdnz .LBB7_3 ; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: ld r7, -192(r1) # 8-byte Folded Reload ; CHECK-NEXT: cmpldi r6, 0 ; CHECK-NEXT: beq cr0, .LBB7_7 ; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader -; CHECK-NEXT: ld r4, -184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: sldi r7, r7, 3 -; CHECK-NEXT: add r4, r0, r4 -; CHECK-NEXT: sldi r4, r4, 3 -; CHECK-NEXT: add r3, r5, r4 -; CHECK-NEXT: add r8, r29, r4 -; CHECK-NEXT: add r9, r30, r4 -; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r4, r0, r4 -; CHECK-NEXT: sldi r4, r4, 3 -; CHECK-NEXT: add r10, r5, r4 -; CHECK-NEXT: add r11, r29, r4 -; CHECK-NEXT: add r12, r30, r4 -; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r4, r0, r4 -; CHECK-NEXT: sldi r0, r4, 3 -; CHECK-NEXT: add r5, r5, r0 -; CHECK-NEXT: add r4, r29, r0 -; CHECK-NEXT: add r30, r30, r0 -; CHECK-NEXT: li r0, 0 +; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: sldi r8, r30, 3 +; CHECK-NEXT: add r3, r10, r3 +; CHECK-NEXT: sldi r3, r3, 3 +; CHECK-NEXT: add r7, r5, r3 +; CHECK-NEXT: add r9, r4, r3 +; CHECK-NEXT: add r11, r0, r3 +; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r3, r10, r3 +; CHECK-NEXT: sldi r3, r3, 3 +; CHECK-NEXT: add r12, r5, r3 +; CHECK-NEXT: add r30, r4, r3 +; CHECK-NEXT: add r29, r0, r3 +; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r3, r10, r3 +; CHECK-NEXT: li r10, 0 +; CHECK-NEXT: sldi r3, r3, 3 +; CHECK-NEXT: add r5, r5, r3 +; CHECK-NEXT: add r4, r4, r3 +; CHECK-NEXT: add r3, r0, r3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB7_6: # %for.body.epil ; CHECK-NEXT: # -; CHECK-NEXT: lfdx f0, r30, r0 -; CHECK-NEXT: lfdx f1, r4, r0 +; CHECK-NEXT: lfdx f0, r3, r10 +; CHECK-NEXT: lfdx f1, r4, r10 ; CHECK-NEXT: addi r6, r6, -1 ; CHECK-NEXT: cmpldi r6, 0 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfd f1, 0(r5) ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfd f0, 0(r5) -; CHECK-NEXT: add r5, r5, r7 -; CHECK-NEXT: lfdx f0, r12, r0 -; CHECK-NEXT: lfdx f1, r11, r0 +; CHECK-NEXT: add r5, r5, r8 +; CHECK-NEXT: lfdx f0, r29, r10 +; CHECK-NEXT: lfdx f1, r30, r10 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r10, r0 +; CHECK-NEXT: lfdx f1, r12, r10 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r10, r0 -; CHECK-NEXT: lfdx f0, r9, r0 -; CHECK-NEXT: lfdx f1, r8, r0 +; CHECK-NEXT: stfdx f0, r12, r10 +; CHECK-NEXT: lfdx f0, r11, r10 +; CHECK-NEXT: lfdx f1, r9, r10 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r3, r0 +; CHECK-NEXT: lfdx f1, r7, r10 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r3, r0 -; CHECK-NEXT: add r0, r0, r7 +; CHECK-NEXT: stfdx f0, r7, r10 +; CHECK-NEXT: add r10, r10, r8 ; CHECK-NEXT: bne cr0, .LBB7_6 ; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup ; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload |
