diff options
Diffstat (limited to 'llvm/test/CodeGen/ARM')
| -rw-r--r-- | llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll | 31 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/GlobalISel/fpenv.ll | 85 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/apple-version-min.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/cmse-harden-call-returned-values.ll | 552 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/cmse-harden-entry-arguments.ll | 368 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/constant-island-movwt.mir | 15 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/constant-islands-split-IT.mir | 104 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/fp-intrinsics.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/fp16-promote.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/frem-power2.ll | 52 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/ldexp.ll | 58 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/machine-outliner-no-candidates-without-stack-fixup.ll | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/vdiv_combine.ll | 111 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/vector-store.ll | 859 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/vfloatintrinsics.ll | 21 |
17 files changed, 1871 insertions, 479 deletions
diff --git a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index e14e59808624..b6ebeaae5eb6 100644 --- a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -56,6 +56,37 @@ L.entry: declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly +define void @test_tan(ptr %X) nounwind { + +; CHECK-LABEL: test_tan: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK: movt [[reg0]], :upper16:{{.*}} +; CHECK: vld1.64 + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: vst1.64 + +L.entry: + %0 = load <4 x float>, ptr @A, align 16 + %1 = call <4 x float> @llvm.tan.v4f32(<4 x float> %0) + store <4 x float> %1, ptr %X, align 16 + ret void +} + +declare <4 x float> @llvm.tan.v4f32(<4 x float>) nounwind readonly + define void @test_exp(ptr %X) nounwind { ; CHECK-LABEL: test_exp: diff --git a/llvm/test/CodeGen/ARM/GlobalISel/fpenv.ll b/llvm/test/CodeGen/ARM/GlobalISel/fpenv.ll index f5aea62d4be7..f8dba64e7a01 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/fpenv.ll +++ b/llvm/test/CodeGen/ARM/GlobalISel/fpenv.ll @@ -1,10 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 -global-isel=1 --verify-machineinstrs %s -o - | FileCheck %s -declare i32 @llvm.get.fpenv.i32() -declare void @llvm.set.fpenv.i32(i32) -declare void @llvm.reset.fpenv() - define i32 @func_get_fpenv() { ; CHECK-LABEL: func_get_fpenv: ; CHECK: @ %bb.0: @ %entry @@ -88,5 +84,86 @@ entry: ret void } + +define i32 @get_fpmode_soft() #0 { +; CHECK-LABEL: get_fpmode_soft: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: add r4, sp, #4 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl fegetmode +; CHECK-NEXT: ldr r0, [r4] +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: pop {r4, lr} +; CHECK-NEXT: mov pc, lr +entry: + %fpenv = call i32 @llvm.get.fpmode.i32() + ret i32 %fpenv +} + +define i32 @get_fpmode() nounwind { +; CHECK-LABEL: get_fpmode: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: mov pc, lr +entry: + %fpenv = call i32 @llvm.get.fpmode.i32() + ret i32 %fpenv +} + +define void @set_fpmode_soft(i32 %fpmode) #0 { +; CHECK-LABEL: set_fpmode_soft: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: str r0, [r1] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl fesetmode +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr +entry: + call void @llvm.set.fpmode.i32(i32 %fpmode) + ret void +} + +define void @set_fpmode(i32 %fpmode) nounwind { +; CHECK-LABEL: set_fpmode: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmrs r1, fpscr +; CHECK-NEXT: mov r2, #159 +; CHECK-NEXT: orr r2, r2, #-134217728 +; CHECK-NEXT: and r1, r1, r2 +; CHECK-NEXT: mvn r2, #159 +; CHECK-NEXT: sub r2, r2, #-134217728 +; CHECK-NEXT: and r0, r0, r2 +; CHECK-NEXT: orr r0, r1, r0 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: mov pc, lr +entry: + call void @llvm.set.fpmode.i32(i32 %fpmode) + ret void +} + +define void @reset_fpmode_soft() #0 { +; CHECK-LABEL: reset_fpmode_soft: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: mvn r0, #0 +; CHECK-NEXT: bl fesetmode +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr +entry: + call void @llvm.reset.fpmode() + ret void +} + attributes #0 = { nounwind "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/ARM/apple-version-min.ll b/llvm/test/CodeGen/ARM/apple-version-min.ll new file mode 100644 index 000000000000..180c19e5e362 --- /dev/null +++ b/llvm/test/CodeGen/ARM/apple-version-min.ll @@ -0,0 +1,12 @@ +; Test emitting version_min directives. + +; Let's not split this into separate ARM/AArch64 parts. +; REQUIRES: aarch64-registered-target + +; RUN: llc %s -filetype=asm -o - --mtriple arm64-apple-tvos9.0.0 | FileCheck %s --check-prefix=TVOS +; RUN: llc %s -filetype=asm -o - --mtriple thumbv7s-apple-ios7.0.0 | FileCheck %s --check-prefix=IOS +; RUN: llc %s -filetype=asm -o - --mtriple thumbv7k-apple-watchos2.0.0 | FileCheck %s --check-prefix=WATCHOS + +; TVOS: .tvos_version_min 9, 0 +; IOS: .ios_version_min 7, 0 +; WATCHOS: .watchos_version_min 2, 0 diff --git a/llvm/test/CodeGen/ARM/cmse-harden-call-returned-values.ll b/llvm/test/CodeGen/ARM/cmse-harden-call-returned-values.ll new file mode 100644 index 000000000000..58eef443c25e --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-harden-call-returned-values.ll @@ -0,0 +1,552 @@ +; RUN: llc %s -mtriple=thumbv8m.main -o - | FileCheck %s --check-prefixes V8M-COMMON,V8M-LE +; RUN: llc %s -mtriple=thumbebv8m.main -o - | FileCheck %s --check-prefixes V8M-COMMON,V8M-BE +; RUN: llc %s -mtriple=thumbv8.1m.main -o - | FileCheck %s --check-prefixes V81M-COMMON,V81M-LE +; RUN: llc %s -mtriple=thumbebv8.1m.main -o - | FileCheck %s --check-prefixes V81M-COMMON,V81M-BE + +@get_idx = hidden local_unnamed_addr global ptr null, align 4 +@arr = hidden local_unnamed_addr global [256 x i32] zeroinitializer, align 4 + +define i32 @access_i16() { +; V8M-COMMON-LABEL: access_i16: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: movw r0, :lower16:get_idx +; V8M-COMMON-NEXT: movt r0, :upper16:get_idx +; V8M-COMMON-NEXT: ldr r0, [r0] +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: sxth r0, r0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_i16: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: movw r0, :lower16:get_idx +; V81M-COMMON-NEXT: movt r0, :upper16:get_idx +; V81M-COMMON-NEXT: ldr r0, [r0] +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: sxth r0, r0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %0 = load ptr, ptr @get_idx, align 4 + %call = tail call signext i16 %0() "cmse_nonsecure_call" + %idxprom = sext i16 %call to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + ret i32 %1 +} + +define i32 @access_u16() { +; V8M-COMMON-LABEL: access_u16: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: movw r0, :lower16:get_idx +; V8M-COMMON-NEXT: movt r0, :upper16:get_idx +; V8M-COMMON-NEXT: ldr r0, [r0] +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: uxth r0, r0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_u16: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: movw r0, :lower16:get_idx +; V81M-COMMON-NEXT: movt r0, :upper16:get_idx +; V81M-COMMON-NEXT: ldr r0, [r0] +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: uxth r0, r0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %0 = load ptr, ptr @get_idx, align 4 + %call = tail call zeroext i16 %0() "cmse_nonsecure_call" + %idxprom = zext i16 %call to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + ret i32 %1 +} + +define i32 @access_i8() { +; V8M-COMMON-LABEL: access_i8: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: movw r0, :lower16:get_idx +; V8M-COMMON-NEXT: movt r0, :upper16:get_idx +; V8M-COMMON-NEXT: ldr r0, [r0] +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: sxtb r0, r0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_i8: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: movw r0, :lower16:get_idx +; V81M-COMMON-NEXT: movt r0, :upper16:get_idx +; V81M-COMMON-NEXT: ldr r0, [r0] +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: sxtb r0, r0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %0 = load ptr, ptr @get_idx, align 4 + %call = tail call signext i8 %0() "cmse_nonsecure_call" + %idxprom = sext i8 %call to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + ret i32 %1 +} + +define i32 @access_u8() { +; V8M-COMMON-LABEL: access_u8: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: movw r0, :lower16:get_idx +; V8M-COMMON-NEXT: movt r0, :upper16:get_idx +; V8M-COMMON-NEXT: ldr r0, [r0] +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: uxtb r0, r0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_u8: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: movw r0, :lower16:get_idx +; V81M-COMMON-NEXT: movt r0, :upper16:get_idx +; V81M-COMMON-NEXT: ldr r0, [r0] +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: uxtb r0, r0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %0 = load ptr, ptr @get_idx, align 4 + %call = tail call zeroext i8 %0() "cmse_nonsecure_call" + %idxprom = zext i8 %call to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + ret i32 %1 +} + +define i32 @access_i1() { +; V8M-COMMON-LABEL: access_i1: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: movw r0, :lower16:get_idx +; V8M-COMMON-NEXT: movt r0, :upper16:get_idx +; V8M-COMMON-NEXT: ldr r0, [r0] +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: and r0, r0, #1 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_i1: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: movw r0, :lower16:get_idx +; V81M-COMMON-NEXT: movt r0, :upper16:get_idx +; V81M-COMMON-NEXT: ldr r0, [r0] +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: and r0, r0, #1 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %0 = load ptr, ptr @get_idx, align 4 + %call = tail call zeroext i1 %0() "cmse_nonsecure_call" + %idxprom = zext i1 %call to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + ret i32 %1 +} + +define i32 @access_i5() { +; V8M-COMMON-LABEL: access_i5: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: movw r0, :lower16:get_idx +; V8M-COMMON-NEXT: movt r0, :upper16:get_idx +; V8M-COMMON-NEXT: ldr r0, [r0] +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: sbfx r0, r0, #0, #5 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_i5: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: movw r0, :lower16:get_idx +; V81M-COMMON-NEXT: movt r0, :upper16:get_idx +; V81M-COMMON-NEXT: ldr r0, [r0] +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: sbfx r0, r0, #0, #5 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %0 = load ptr, ptr @get_idx, align 4 + %call = tail call signext i5 %0() "cmse_nonsecure_call" + %idxprom = sext i5 %call to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + ret i32 %1 +} + +define i32 @access_u5() { +; V8M-COMMON-LABEL: access_u5: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: movw r0, :lower16:get_idx +; V8M-COMMON-NEXT: movt r0, :upper16:get_idx +; V8M-COMMON-NEXT: ldr r0, [r0] +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: and r0, r0, #31 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_u5: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: movw r0, :lower16:get_idx +; V81M-COMMON-NEXT: movt r0, :upper16:get_idx +; V81M-COMMON-NEXT: ldr r0, [r0] +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: and r0, r0, #31 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %0 = load ptr, ptr @get_idx, align 4 + %call = tail call zeroext i5 %0() "cmse_nonsecure_call" + %idxprom = zext i5 %call to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + ret i32 %1 +} + +define i32 @access_i33(ptr %f) { +; V8M-COMMON-LABEL: access_i33: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-LE-NEXT: and r0, r1, #1 +; V8M-BE-NEXT: and r0, r0, #1 +; V8M-COMMON-NEXT: rsb.w r0, r0, #0 +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_i33: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-LE-NEXT: and r0, r1, #1 +; V81M-BE-NEXT: and r0, r0, #1 +; V81M-COMMON-NEXT: rsb.w r0, r0, #0 +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %call = tail call i33 %f() "cmse_nonsecure_call" + %shr = ashr i33 %call, 32 + %conv = trunc nsw i33 %shr to i32 + ret i32 %conv +} + +define i32 @access_u33(ptr %f) { +; V8M-COMMON-LABEL: access_u33: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: push {r7, lr} +; V8M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-COMMON-NEXT: bic r0, r0, #1 +; V8M-COMMON-NEXT: sub sp, #136 +; V8M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V8M-COMMON-NEXT: mov r1, r0 +; V8M-COMMON-NEXT: mov r2, r0 +; V8M-COMMON-NEXT: mov r3, r0 +; V8M-COMMON-NEXT: mov r4, r0 +; V8M-COMMON-NEXT: mov r5, r0 +; V8M-COMMON-NEXT: mov r6, r0 +; V8M-COMMON-NEXT: mov r7, r0 +; V8M-COMMON-NEXT: mov r8, r0 +; V8M-COMMON-NEXT: mov r9, r0 +; V8M-COMMON-NEXT: mov r10, r0 +; V8M-COMMON-NEXT: mov r11, r0 +; V8M-COMMON-NEXT: mov r12, r0 +; V8M-COMMON-NEXT: msr apsr_nzcvq, r0 +; V8M-COMMON-NEXT: blxns r0 +; V8M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V8M-COMMON-NEXT: add sp, #136 +; V8M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V8M-LE-NEXT: and r0, r1, #1 +; V8M-BE-NEXT: and r0, r0, #1 +; V8M-COMMON-NEXT: pop {r7, pc} +; +; V81M-COMMON-LABEL: access_u33: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: push {r7, lr} +; V81M-COMMON-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-COMMON-NEXT: bic r0, r0, #1 +; V81M-COMMON-NEXT: sub sp, #136 +; V81M-COMMON-NEXT: vlstm sp, {d0 - d15} +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; V81M-COMMON-NEXT: blxns r0 +; V81M-COMMON-NEXT: vlldm sp, {d0 - d15} +; V81M-COMMON-NEXT: add sp, #136 +; V81M-COMMON-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; V81M-LE-NEXT: and r0, r1, #1 +; V81M-BE-NEXT: and r0, r0, #1 +; V81M-COMMON-NEXT: pop {r7, pc} +entry: + %call = tail call i33 %f() "cmse_nonsecure_call" + %shr = lshr i33 %call, 32 + %conv = trunc nuw nsw i33 %shr to i32 + ret i32 %conv +} diff --git a/llvm/test/CodeGen/ARM/cmse-harden-entry-arguments.ll b/llvm/test/CodeGen/ARM/cmse-harden-entry-arguments.ll new file mode 100644 index 000000000000..c66ab00566dd --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-harden-entry-arguments.ll @@ -0,0 +1,368 @@ +; RUN: llc %s -mtriple=thumbv8m.main -o - | FileCheck %s --check-prefixes V8M-COMMON,V8M-LE +; RUN: llc %s -mtriple=thumbebv8m.main -o - | FileCheck %s --check-prefixes V8M-COMMON,V8M-BE +; RUN: llc %s -mtriple=thumbv8.1m.main -o - | FileCheck %s --check-prefixes V81M-COMMON,V81M-LE +; RUN: llc %s -mtriple=thumbebv8.1m.main -o - | FileCheck %s --check-prefixes V81M-COMMON,V81M-BE + +@arr = hidden local_unnamed_addr global [256 x i32] zeroinitializer, align 4 + +define i32 @access_i16(i16 signext %idx) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_i16: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: sxth r0, r0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_i16: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: sxth r0, r0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %idxprom = sext i16 %idx to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @access_u16(i16 zeroext %idx) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_u16: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: uxth r0, r0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_u16: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: uxth r0, r0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %idxprom = zext i16 %idx to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @access_i8(i8 signext %idx) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_i8: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: sxtb r0, r0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_i8: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: sxtb r0, r0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %idxprom = sext i8 %idx to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @access_u8(i8 zeroext %idx) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_u8: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: uxtb r0, r0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_u8: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: uxtb r0, r0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %idxprom = zext i8 %idx to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @access_i1(i1 signext %idx) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_i1: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: and r0, r0, #1 +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: rsbs r0, r0, #0 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: and r0, r0, #1 +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_i1: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: and r0, r0, #1 +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: rsbs r0, r0, #0 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: and r0, r0, #1 +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %idxprom = zext i1 %idx to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @access_i5(i5 signext %idx) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_i5: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: sbfx r0, r0, #0, #5 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_i5: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: sbfx r0, r0, #0, #5 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %idxprom = sext i5 %idx to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @access_u5(i5 zeroext %idx) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_u5: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: movw r1, :lower16:arr +; V8M-COMMON-NEXT: and r0, r0, #31 +; V8M-COMMON-NEXT: movt r1, :upper16:arr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_u5: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: movw r1, :lower16:arr +; V81M-COMMON-NEXT: and r0, r0, #31 +; V81M-COMMON-NEXT: movt r1, :upper16:arr +; V81M-COMMON-NEXT: ldr.w r0, [r1, r0, lsl #2] +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %idxprom = zext i5 %idx to i32 + %arrayidx = getelementptr inbounds [256 x i32], ptr @arr, i32 0, i32 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @access_i33(i33 %arg) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_i33: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-LE-NEXT: and r0, r1, #1 +; V8M-BE-NEXT: and r0, r0, #1 +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: rsbs r0, r0, #0 +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_i33: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-LE-NEXT: and r0, r1, #1 +; V81M-BE-NEXT: and r0, r0, #1 +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: rsbs r0, r0, #0 +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %shr = ashr i33 %arg, 32 + %conv = trunc nsw i33 %shr to i32 + ret i32 %conv +} + +define i32 @access_u33(i33 %arg) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_u33: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-LE-NEXT: and r0, r1, #1 +; V8M-BE-NEXT: and r0, r0, #1 +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_u33: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-LE-NEXT: and r0, r1, #1 +; V81M-BE-NEXT: and r0, r0, #1 +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %shr = lshr i33 %arg, 32 + %conv = trunc nuw nsw i33 %shr to i32 + ret i32 %conv +} + +define i32 @access_i65(ptr byval(i65) %0) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_i65: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: sub sp, #16 +; V8M-COMMON-NEXT: stm.w sp, {r0, r1, r2, r3} +; V8M-LE-NEXT: ldrb.w r0, [sp, #8] +; V8M-LE-NEXT: and r0, r0, #1 +; V8M-LE-NEXT: rsbs r0, r0, #0 +; V8M-BE-NEXT: movs r1, #0 +; V8M-BE-NEXT: sub.w r0, r1, r0, lsr #24 +; V8M-COMMON-NEXT: add sp, #16 +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_i65: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: sub sp, #16 +; V81M-COMMON-NEXT: add sp, #4 +; V81M-COMMON-NEXT: stm.w sp, {r0, r1, r2, r3} +; V81M-LE-NEXT: ldrb.w r0, [sp, #8] +; V81M-LE-NEXT: and r0, r0, #1 +; V81M-LE-NEXT: rsbs r0, r0, #0 +; V81M-BE-NEXT: movs r1, #0 +; V81M-BE-NEXT: sub.w r0, r1, r0, lsr #24 +; V81M-COMMON-NEXT: sub sp, #4 +; V81M-COMMON-NEXT: add sp, #16 +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %arg = load i65, ptr %0, align 8 + %shr = ashr i65 %arg, 64 + %conv = trunc nsw i65 %shr to i32 + ret i32 %conv +} + +define i32 @access_u65(ptr byval(i65) %0) "cmse_nonsecure_entry" { +; V8M-COMMON-LABEL: access_u65: +; V8M-COMMON: @ %bb.0: @ %entry +; V8M-COMMON-NEXT: sub sp, #16 +; V8M-COMMON-NEXT: stm.w sp, {r0, r1, r2, r3} +; V8M-LE-NEXT: ldrb.w r0, [sp, #8] +; V8M-BE-NEXT: lsrs r0, r0, #24 +; V8M-COMMON-NEXT: add sp, #16 +; V8M-COMMON-NEXT: mov r1, lr +; V8M-COMMON-NEXT: mov r2, lr +; V8M-COMMON-NEXT: mov r3, lr +; V8M-COMMON-NEXT: mov r12, lr +; V8M-COMMON-NEXT: msr apsr_nzcvq, lr +; V8M-COMMON-NEXT: bxns lr +; +; V81M-COMMON-LABEL: access_u65: +; V81M-COMMON: @ %bb.0: @ %entry +; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-COMMON-NEXT: sub sp, #16 +; V81M-COMMON-NEXT: add sp, #4 +; V81M-COMMON-NEXT: stm.w sp, {r0, r1, r2, r3} +; V81M-LE-NEXT: ldrb.w r0, [sp, #8] +; V81M-BE-NEXT: lsrs r0, r0, #24 +; V81M-COMMON-NEXT: sub sp, #4 +; V81M-COMMON-NEXT: add sp, #16 +; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 +; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-COMMON-NEXT: bxns lr +entry: + %arg = load i65, ptr %0, align 8 + %shr = lshr i65 %arg, 64 + %conv = trunc nuw nsw i65 %shr to i32 + ret i32 %conv +} diff --git a/llvm/test/CodeGen/ARM/constant-island-movwt.mir b/llvm/test/CodeGen/ARM/constant-island-movwt.mir index 7d21a4e4875c..7b3e59eca847 100644 --- a/llvm/test/CodeGen/ARM/constant-island-movwt.mir +++ b/llvm/test/CodeGen/ARM/constant-island-movwt.mir @@ -898,13 +898,14 @@ body: | # CHECK-NEXT: CONSTPOOL_ENTRY 1, %const.0, 4 # CHECK-NEXT: {{^ $}} # CHECK-NEXT: bb.2.entry (align 2): -# CHECK-NEXT: liveins: $d13, $s27, $r10, $r9, $r8, $s26, $d12, $s25, $s24, -# CHECK-SAME: $d15, $s30, $s31, $d14, $s28, $s29, $lr, $r0, $d21, -# CHECK-SAME: $r3, $q10, $d20, $d17, $r2, $d25, $q11, $d22, $d23, -# CHECK-SAME: $r1, $q8, $d16, $s3, $q14, $d28, $d29, $d19, $s17, -# CHECK-SAME: $d8, $s16, $r6, $r7, $r4, $q12, $q9, $d18, $s0, $q15, -# CHECK-SAME: $d30, $d31, $r12, $s1, $d0, $d24, $s2, $d1, $q0, $s6, -# CHECK-SAME: $d3, $d2, $s4, $q1, $s7, $s5, $d9, $s18, $s19, $q4 +# CHECK-NEXT: liveins: $s26, $s27, $r10, $r9, $r8, $d13, $s24, $s25, +# CHECK-SAME: $d12, $d15, $s30, $s31, $d14, $s28, $s29, $lr, +# CHECK-SAME: $d21, $q10, $r7, $r0, $d20, $d17, $r2, $q12, +# CHECK-SAME: $q11, $d22, $d23, $r1, $q8, $d16, $d30, $q14, +# CHECK-SAME: $d28, $d29, $d19, $s17, $r4, $d8, $r6, $r3, +# CHECK-SAME: $s16, $d25, $q9, $d18, $s0, $d31, $s3, $q15, +# CHECK-SAME: $r12, $d0, $s1, $d24, $d1, $s2, $q0, $s5, $d2, +# CHECK-SAME: $q1, $s4, $s7, $d3, $s6, $d9, $s18, $s19, $q4 # CHECK-NEXT: {{^ $}} # CHECK-NEXT: $r5 = t2MOVi16 target-flags(arm-lo16) @.str.84, 14 /* CC::al */, $noreg # CHECK-NEXT: $r5 = t2MOVTi16 $r5, target-flags(arm-hi16) @.str.84, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir b/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir index 85fd2d610b1e..236cd34f7fba 100644 --- a/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir +++ b/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir @@ -6,7 +6,6 @@ # --- | - ; ModuleID = '<stdin>' source_filename = "<stdin>" target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8m.main-arm-none-eabi" @@ -69,48 +68,67 @@ machineFunctionInfo: {} body: | ; CHECK-LABEL: name: h ; CHECK: bb.0: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: renamable $d0 = VLDRD %const.3, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool) - ; CHECK: dead renamable $r0 = SPACE 40, undef renamable $r0 - ; CHECK: tB %bb.4, 14 /* CC::al */, $noreg - ; CHECK: bb.1 (align 8): - ; CHECK: successors: - ; CHECK: CONSTPOOL_ENTRY 3, %const.0, 8 - ; CHECK: bb.2: - ; CHECK: successors: - ; CHECK: bb.3 (align 8): - ; CHECK: successors: - ; CHECK: CONSTPOOL_ENTRY 5, %const.2, 8 - ; CHECK: bb.4 (align 2): - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: dead renamable $r0 = SPACE 790, undef renamable $r0 - ; CHECK: bb.5: - ; CHECK: successors: %bb.7(0x80000000) - ; CHECK: renamable $r0 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2CMPri $r0, 32, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r0 = SPACE 200, undef renamable $r0 - ; CHECK: t2IT 0, 1, implicit-def $itstate - ; CHECK: renamable $d0 = VLDRD %const.7, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) - ; CHECK: renamable $d1 = VLDRD %const.5, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) - ; CHECK: renamable $d2 = VLDRD %const.6, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) - ; CHECK: $r0 = t2SUBri $r0, 12, 0 /* CC::eq */, $cpsr, $noreg, implicit killed $itstate - ; CHECK: t2B %bb.7, 14 /* CC::al */, $noreg - ; CHECK: bb.6 (align 8): - ; CHECK: successors: - ; CHECK: CONSTPOOL_ENTRY 7, %const.1, 8 - ; CHECK: bb.7 (align 2): - ; CHECK: liveins: $r0, $cpsr, $d0, $s0, $s1, $d1, $s2, $s3, $d2, $s4, $s5 - ; CHECK: t2IT 0, 4, implicit-def $itstate - ; CHECK: $sp = tMOVr $r0, 0 /* CC::eq */, $cpsr, implicit $itstate - ; CHECK: $sp = t2LDMIA_RET $sp, 0 /* CC::eq */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc, implicit killed $d0, implicit killed $d1, implicit killed $d2, implicit $sp, implicit killed $itstate - ; CHECK: bb.8 (align 8): - ; CHECK: successors: - ; CHECK: CONSTPOOL_ENTRY 6, %const.0, 8 - ; CHECK: bb.9 (align 2): - ; CHECK: successors: %bb.9(0x80000000) - ; CHECK: dead renamable $r0 = SPACE 4000, undef renamable $r0 - ; CHECK: t2B %bb.9, 14 /* CC::al */, $noreg - ; CHECK: bb.10: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $d0 = VLDRD %const.3, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool) + ; CHECK-NEXT: dead renamable $r0 = SPACE 40, undef renamable $r0 + ; CHECK-NEXT: tB %bb.4, 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (align 8): + ; CHECK-NEXT: successors: + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CONSTPOOL_ENTRY 3, %const.0, 8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3 (align 8): + ; CHECK-NEXT: successors: + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CONSTPOOL_ENTRY 5, %const.2, 8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4 (align 2): + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead renamable $r0 = SPACE 790, undef renamable $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r0 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: t2CMPri $r0, 32, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NEXT: renamable $r0 = SPACE 200, undef renamable $r0 + ; CHECK-NEXT: t2IT 0, 1, implicit-def $itstate + ; CHECK-NEXT: renamable $d0 = VLDRD %const.7, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) + ; CHECK-NEXT: renamable $d1 = VLDRD %const.5, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) + ; CHECK-NEXT: renamable $d2 = VLDRD %const.6, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) + ; CHECK-NEXT: $r0 = t2SUBri $r0, 12, 0 /* CC::eq */, $cpsr, $noreg, implicit killed $itstate + ; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6 (align 8): + ; CHECK-NEXT: successors: + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CONSTPOOL_ENTRY 7, %const.1, 8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7 (align 2): + ; CHECK-NEXT: liveins: $r0, $cpsr, $d0, $s0, $s1, $d1, $s2, $s3, $d2, $s4, $s5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: t2IT 0, 4, implicit-def $itstate + ; CHECK-NEXT: $sp = tMOVr $r0, 0 /* CC::eq */, $cpsr, implicit $itstate + ; CHECK-NEXT: $sp = t2LDMIA_RET $sp, 0 /* CC::eq */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc, implicit killed $d0, implicit killed $d1, implicit killed $d2, implicit $sp, implicit killed $itstate + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8 (align 8): + ; CHECK-NEXT: successors: + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CONSTPOOL_ENTRY 6, %const.0, 8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9 (align 2): + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead renamable $r0 = SPACE 4000, undef renamable $r0 + ; CHECK-NEXT: t2B %bb.9, 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10: bb.0: successors: %bb.1(0x80000000) diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll index 64b22a5cc71b..e286eb3226e4 100644 --- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -139,6 +139,13 @@ define float @cos_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: tan_f32: +; CHECK: bl tanf +define float @tan_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: pow_f32: ; CHECK: bl powf define float @pow_f32(float %x, float %y) #0 { @@ -596,6 +603,13 @@ define double @cos_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: tan_f64: +; CHECK: bl tan +define double @tan_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.tan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: pow_f64: ; CHECK: bl pow define double @pow_f64(double %x, double %y) #0 { @@ -1023,6 +1037,7 @@ declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) @@ -1056,6 +1071,7 @@ declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadat declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll index 7381d517505e..2656cdbb0347 100644 --- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll @@ -281,6 +281,23 @@ define void @test_cos(ptr %p) { ret void } +define void @test_tan(ptr %p) { +; CHECK-LABEL: test_tan: +; CHECK: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vldr.16 s0, [r0] +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r4] +; CHECK-NEXT: pop {r4, pc} + %a = load half, ptr %p, align 2 + %r = call half @llvm.tan.f16(half %a) + store half %r, ptr %p + ret void +} + define void @test_pow(ptr %p, ptr %q) { ; CHECK-LABEL: test_pow: ; CHECK: .save {r4, lr} @@ -588,6 +605,7 @@ declare half @llvm.sqrt.f16(half %a) declare half @llvm.powi.f16.i32(half %a, i32 %b) declare half @llvm.sin.f16(half %a) declare half @llvm.cos.f16(half %a) +declare half @llvm.tan.f16(half %a) declare half @llvm.pow.f16(half %a, half %b) declare half @llvm.exp.f16(half %a) declare half @llvm.exp2.f16(half %a) diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index 9c01129ff30d..ae3b8f9920e3 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -393,6 +393,7 @@ declare half @llvm.sqrt.f16(half %a) #0 declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 declare half @llvm.sin.f16(half %a) #0 declare half @llvm.cos.f16(half %a) #0 +declare half @llvm.tan.f16(half %a) #0 declare half @llvm.pow.f16(half %a, half %b) #0 declare half @llvm.exp.f16(half %a) #0 declare half @llvm.exp2.f16(half %a) #0 @@ -472,6 +473,21 @@ define void @test_cos(ptr %p) #0 { ret void } +; CHECK-FP16-LABEL: test_tan: +; CHECK-FP16: vcvtb.f32.f16 +; CHECK-FP16: bl tanf +; CHECK-FP16: vcvtb.f16.f32 +; CHECK-LIBCALL-LABEL: test_tan: +; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL: bl tanf +; CHECK-LIBCALL: bl __aeabi_f2h +define void @test_tan(ptr %p) #0 { + %a = load half, ptr %p, align 2 + %r = call half @llvm.tan.f16(half %a) + store half %r, ptr %p + ret void +} + ; CHECK-FP16-LABEL: test_pow: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll index 71c2c09c0105..63ecd9fec788 100644 --- a/llvm/test/CodeGen/ARM/frem-power2.ll +++ b/llvm/test/CodeGen/ARM/frem-power2.ll @@ -14,26 +14,28 @@ define float @frem4(float %x) { ; ; CHECK-FP-LABEL: frem4: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-FP-NEXT: vmov s2, r0 +; CHECK-FP-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-FP-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-FP-NEXT: vmov s4, r0 ; CHECK-FP-NEXT: lsrs r0, r0, #31 -; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-FP-NEXT: vrintz.f32 s4, s4 -; CHECK-FP-NEXT: vfms.f32 s2, s4, s0 -; CHECK-FP-NEXT: vmov r1, s2 +; CHECK-FP-NEXT: vmul.f32 s0, s4, s0 +; CHECK-FP-NEXT: vrintz.f32 s0, s0 +; CHECK-FP-NEXT: vfma.f32 s4, s0, s2 +; CHECK-FP-NEXT: vmov r1, s4 ; CHECK-FP-NEXT: bfi r1, r0, #31, #1 ; CHECK-FP-NEXT: mov r0, r1 ; CHECK-FP-NEXT: bx lr ; ; CHECK-M33-LABEL: frem4: ; CHECK-M33: @ %bb.0: @ %entry -; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-M33-NEXT: vmov s2, r0 +; CHECK-M33-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-M33-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-M33-NEXT: vmov s4, r0 ; CHECK-M33-NEXT: lsrs r0, r0, #31 -; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-M33-NEXT: vrintz.f32 s4, s4 -; CHECK-M33-NEXT: vmls.f32 s2, s4, s0 -; CHECK-M33-NEXT: vmov r1, s2 +; CHECK-M33-NEXT: vmul.f32 s0, s4, s0 +; CHECK-M33-NEXT: vrintz.f32 s0, s0 +; CHECK-M33-NEXT: vmla.f32 s4, s0, s2 +; CHECK-M33-NEXT: vmov r1, s4 ; CHECK-M33-NEXT: bfi r1, r0, #31, #1 ; CHECK-M33-NEXT: mov r0, r1 ; CHECK-M33-NEXT: bx lr @@ -53,22 +55,24 @@ define float @frem4_nsz(float %x) { ; ; CHECK-FP-LABEL: frem4_nsz: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-FP-NEXT: vmov s2, r0 -; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-FP-NEXT: vrintz.f32 s4, s4 -; CHECK-FP-NEXT: vfms.f32 s2, s4, s0 -; CHECK-FP-NEXT: vmov r0, s2 +; CHECK-FP-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-FP-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-FP-NEXT: vmov s4, r0 +; CHECK-FP-NEXT: vmul.f32 s0, s4, s0 +; CHECK-FP-NEXT: vrintz.f32 s0, s0 +; CHECK-FP-NEXT: vfma.f32 s4, s0, s2 +; CHECK-FP-NEXT: vmov r0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-M33-LABEL: frem4_nsz: ; CHECK-M33: @ %bb.0: @ %entry -; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-M33-NEXT: vmov s2, r0 -; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-M33-NEXT: vrintz.f32 s4, s4 -; CHECK-M33-NEXT: vmls.f32 s2, s4, s0 -; CHECK-M33-NEXT: vmov r0, s2 +; CHECK-M33-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-M33-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-M33-NEXT: vmov s4, r0 +; CHECK-M33-NEXT: vmul.f32 s0, s4, s0 +; CHECK-M33-NEXT: vrintz.f32 s0, s0 +; CHECK-M33-NEXT: vmla.f32 s4, s0, s2 +; CHECK-M33-NEXT: vmov r0, s4 ; CHECK-M33-NEXT: bx lr entry: %fmod = frem nsz float %x, 4.0 diff --git a/llvm/test/CodeGen/ARM/ldexp.ll b/llvm/test/CodeGen/ARM/ldexp.ll new file mode 100644 index 000000000000..941390ee94c1 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ldexp.ll @@ -0,0 +1,58 @@ +; RUN: llc -mtriple=armv7-linux < %s -o - | FileCheck -check-prefix=LINUX %s +; RUN: llc -mtriple=thumbv7-windows-msvc -mattr=+thumb-mode < %s -o - | FileCheck -check-prefix=WINDOWS %s + +define double @testExp(double %val, i32 %a) { +; LINUX: b ldexp{{$}} +; WINDOWS: b.w ldexp{{$}} +entry: + %call = tail call fast double @ldexp(double %val, i32 %a) + ret double %call +} + +declare double @ldexp(double, i32) memory(none) + +define double @testExpIntrinsic(double %val, i32 %a) { +; LINUX: b ldexp{{$}} +; WINDOWS: b.w ldexp{{$}} +entry: + %call = tail call fast double @llvm.ldexp.f64(double %val, i32 %a) + ret double %call +} + +define float @testExpf(float %val, i32 %a) { +; LINUX: b ldexpf +; WINDOWS: b.w ldexpf +entry: + %call = tail call fast float @ldexpf(float %val, i32 %a) + ret float %call +} + +define float @testExpfIntrinsic(float %val, i32 %a) { +; LINUX: b ldexpf +; WINDOWS: bl ldexp{{$}} +entry: + %call = tail call fast float @llvm.ldexp.f32(float %val, i32 %a) + ret float %call +} + +declare float @ldexpf(float, i32) memory(none) + +define fp128 @testExpl(fp128 %val, i32 %a) { +; LINUX: bl ldexpl +; WINDOWS: b.w ldexpl +entry: + %call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a) + ret fp128 %call +} + +declare fp128 @ldexpl(fp128, i32) memory(none) + +define half @testExpf16(half %val, i32 %a) { +; LINUX: bl ldexpf +; WINDOWS: bl ldexp{{$}} +entry: + %0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a) + ret half %0 +} + +declare half @llvm.ldexp.f16.i32(half, i32) memory(none) diff --git a/llvm/test/CodeGen/ARM/machine-outliner-no-candidates-without-stack-fixup.ll b/llvm/test/CodeGen/ARM/machine-outliner-no-candidates-without-stack-fixup.ll new file mode 100644 index 000000000000..f50d92b8160d --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-no-candidates-without-stack-fixup.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=thumbv8.1m.main-unknown-unknown < %s | FileCheck %s + +; Make sure this does not assert during machine outlining. + +declare void @a(...) + +define void @b(i32 %a, i32 %b, i32 %c, ptr %d, ptr %e) minsize { +; CHECK-LABEL: b: +; CHECK: @ %bb.0: +; CHECK-NEXT: ldr r3, [sp] +; CHECK-NEXT: mov r2, r1 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: movs r0, #2 +; CHECK-NEXT: b a + tail call void @a(i32 2, i32 %a, i32 %b, ptr %e) + ret void +} + +define void @c(i32 %a, i32 %b, i32 %c, ptr %d, ptr %e) minsize { +; CHECK-LABEL: c: +; CHECK: @ %bb.0: +; CHECK-NEXT: ldr r3, [sp] +; CHECK-NEXT: mov r2, r1 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: movs r0, #4 +; CHECK-NEXT: b a + tail call void @a(i32 4, i32 %a, i32 %b, ptr %e) + ret void +} diff --git a/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir b/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir index bf08af763357..2387d8b4338e 100644 --- a/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir +++ b/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -start-before register-coalescer -mtriple=arm-apple-ios -stop-after machine-scheduler -o - -arm-enable-subreg-liveness -verify-machineinstrs | FileCheck %s +# RUN: llc %s -start-before register-coalescer -mtriple=arm-apple-ios -stop-after machine-scheduler -o - -enable-subreg-liveness -verify-machineinstrs | FileCheck %s # Check that when we merge live-ranges that imply offseting # the definition of a subregister by some other subreg index, diff --git a/llvm/test/CodeGen/ARM/vdiv_combine.ll b/llvm/test/CodeGen/ARM/vdiv_combine.ll index 988844661085..899487f9efb2 100644 --- a/llvm/test/CodeGen/ARM/vdiv_combine.ll +++ b/llvm/test/CodeGen/ARM/vdiv_combine.ll @@ -5,10 +5,7 @@ define arm_aapcs_vfpcc <2 x float> @t1(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d0, d0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> @@ -20,10 +17,7 @@ entry: define arm_aapcs_vfpcc <2 x float> @t2(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.u32 d2, d0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.u32 d0, d0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float> @@ -56,17 +50,10 @@ entry: define arm_aapcs_vfpcc <2 x float> @t4(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vldr s2, LCPI3_0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d16, d0 +; CHECK-NEXT: vmov.i32 d17, #0x2f000000 +; CHECK-NEXT: vmul.f32 d0, d16, d17 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI3_0: -; CHECK-NEXT: .long 0x50000000 @ float 8.58993459E+9 -; CHECK-NEXT: .end_data_region entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000> @@ -77,17 +64,8 @@ entry: define arm_aapcs_vfpcc <2 x float> @t5(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t5: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vldr s2, LCPI4_0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d0, d0, #32 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI4_0: -; CHECK-NEXT: .long 0x4f800000 @ float 4.2949673E+9 -; CHECK-NEXT: .end_data_region entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000> @@ -98,12 +76,7 @@ entry: define arm_aapcs_vfpcc <4 x float> @t6(<4 x i32> %vecinit6.i) nounwind { ; CHECK-LABEL: t6: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float> @@ -115,12 +88,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) { ; CHECK-LABEL: fix_unsigned_i16_to_float: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovl.u16 q8, d0 -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.u32 q2, q8 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.u32 q0, q8, #1 ; CHECK-NEXT: bx lr %conv = uitofp <4 x i16> %in to <4 x float> %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0> @@ -131,12 +99,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) { ; CHECK-LABEL: fix_signed_i16_to_float: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovl.s16 q8, d0 -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q8 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.s32 q0, q8, #1 ; CHECK-NEXT: bx lr %conv = sitofp <4 x i16> %in to <4 x float> %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0> @@ -152,13 +115,12 @@ define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float(<2 x i64> %in) { ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl ___floatundisf ; CHECK-NEXT: vmov r2, r1, d8 -; CHECK-NEXT: vmov s18, r0 -; CHECK-NEXT: vmov.f32 s16, #2.000000e+00 +; CHECK-NEXT: vmov s19, r0 +; CHECK-NEXT: vmov.i32 d8, #0x3f000000 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl ___floatundisf -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vdiv.f32 s1, s18, s16 -; CHECK-NEXT: vdiv.f32 s0, s2, s16 +; CHECK-NEXT: vmov s18, r0 +; CHECK-NEXT: vmul.f32 d0, d9, d8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {lr} ; CHECK-NEXT: bx lr @@ -177,13 +139,13 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) { ; CHECK-NEXT: bl ___floatundidf ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: vmov.f64 d8, #2.000000e+00 +; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: bl ___floatundidf ; CHECK-NEXT: vmov d16, r0, r1 -; CHECK-NEXT: vdiv.f64 d1, d9, d8 -; CHECK-NEXT: vdiv.f64 d0, d16, d8 +; CHECK-NEXT: vmul.f64 d1, d9, d8 +; CHECK-NEXT: vmul.f64 d0, d16, d8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {lr} ; CHECK-NEXT: bx lr @@ -196,19 +158,8 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) { define arm_aapcs_vfpcc <8 x float> @test7(<8 x i32> %in) nounwind { ; CHECK-LABEL: test7: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.f32 s12, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q4, q0 -; CHECK-NEXT: vcvt.f32.s32 q2, q1 -; CHECK-NEXT: vdiv.f32 s3, s19, s12 -; CHECK-NEXT: vdiv.f32 s7, s11, s12 -; CHECK-NEXT: vdiv.f32 s2, s18, s12 -; CHECK-NEXT: vdiv.f32 s6, s10, s12 -; CHECK-NEXT: vdiv.f32 s1, s17, s12 -; CHECK-NEXT: vdiv.f32 s5, s9, s12 -; CHECK-NEXT: vdiv.f32 s0, s16, s12 -; CHECK-NEXT: vdiv.f32 s4, s8, s12 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 +; CHECK-NEXT: vcvt.f32.s32 q1, q1, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <8 x i32> %in to <8 x float> @@ -220,19 +171,8 @@ entry: define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) { ; CHECK-LABEL: test8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 -; CHECK-NEXT: vldr s3, LCPI11_0 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #1 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI11_0: -; CHECK-NEXT: .long 0x7fc00000 @ float NaN -; CHECK-NEXT: .end_data_region %vcvt.i = sitofp <4 x i32> %in to <4 x float> %div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef> ret <4 x float> %div.i @@ -241,19 +181,8 @@ define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) { define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) { ; CHECK-LABEL: test_illegal_int_to_fp: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s4, #4.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 -; CHECK-NEXT: vldr s3, LCPI12_0 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI12_0: -; CHECK-NEXT: .long 0x7fc00000 @ float NaN -; CHECK-NEXT: .end_data_region %conv = sitofp <3 x i32> %in to <3 x float> %res = fdiv <3 x float> %conv, <float 4.0, float 4.0, float 4.0> ret <3 x float> %res diff --git a/llvm/test/CodeGen/ARM/vector-store.ll b/llvm/test/CodeGen/ARM/vector-store.ll index a8a1031637af..a0a801d2b6d2 100644 --- a/llvm/test/CodeGen/ARM/vector-store.ll +++ b/llvm/test/CodeGen/ARM/vector-store.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s | FileCheck %s - -target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -target triple = "thumbv7-none-eabi" +; RUN: llc < %s -mtriple=thumbv7-none-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc < %s -mtriple=thumbebv7-none-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-BE define void @store_v8i8(ptr %ptr, <8 x i8> %val) { ; CHECK-LABEL: store_v8i8: @@ -11,24 +9,33 @@ define void @store_v8i8(ptr %ptr, <8 x i8> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <8 x i8> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <8 x i8> %val, ptr %A, align 1 + ret void } define void @store_v8i8_update(ptr %ptr, <8 x i8> %val) { -; CHECK-LABEL: store_v8i8_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <8 x i8> %val, ptr %A, align 1 - %inc = getelementptr <8 x i8>, ptr %A, i38 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v8i8_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v8i8_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <8 x i8> %val, ptr %A, align 1 + %inc = getelementptr <8 x i8>, ptr %A, i38 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v4i16(ptr %ptr, <4 x i16> %val) { @@ -38,24 +45,33 @@ define void @store_v4i16(ptr %ptr, <4 x i16> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x i16> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <4 x i16> %val, ptr %A, align 1 + ret void } define void @store_v4i16_update(ptr %ptr, <4 x i16> %val) { -; CHECK-LABEL: store_v4i16_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x i16> %val, ptr %A, align 1 - %inc = getelementptr <4 x i16>, ptr %A, i34 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v4i16_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4i16_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x i16> %val, ptr %A, align 1 + %inc = getelementptr <4 x i16>, ptr %A, i34 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i32(ptr %ptr, <2 x i32> %val) { @@ -65,24 +81,33 @@ define void @store_v2i32(ptr %ptr, <2 x i32> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i32> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <2 x i32> %val, ptr %A, align 1 + ret void } define void @store_v2i32_update(ptr %ptr, <2 x i32> %val) { -; CHECK-LABEL: store_v2i32_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i32> %val, ptr %A, align 1 - %inc = getelementptr <2 x i32>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i32_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i32_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i32> %val, ptr %A, align 1 + %inc = getelementptr <2 x i32>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2f32(ptr %ptr, <2 x float> %val) { @@ -92,24 +117,33 @@ define void @store_v2f32(ptr %ptr, <2 x float> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x float> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <2 x float> %val, ptr %A, align 1 + ret void } define void @store_v2f32_update(ptr %ptr, <2 x float> %val) { -; CHECK-LABEL: store_v2f32_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x float> %val, ptr %A, align 1 - %inc = getelementptr <2 x float>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2f32_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2f32_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x float> %val, ptr %A, align 1 + %inc = getelementptr <2 x float>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v1i64(ptr %ptr, <1 x i64> %val) { @@ -119,279 +153,458 @@ define void @store_v1i64(ptr %ptr, <1 x i64> %val) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: str r2, [r0] ; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <1 x i64> %val, ptr %A, align 1 - ret void + %A = load ptr, ptr %ptr + store <1 x i64> %val, ptr %A, align 1 + ret void } define void @store_v1i64_update(ptr %ptr, <1 x i64> %val) { -; CHECK-LABEL: store_v1i64_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <1 x i64> %val, ptr %A, align 1 - %inc = getelementptr <1 x i64>, ptr %A, i31 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v1i64_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v1i64_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 d16, d16 +; CHECK-BE-NEXT: vst1.8 {d16}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <1 x i64> %val, ptr %A, align 1 + %inc = getelementptr <1 x i64>, ptr %A, i31 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v16i8(ptr %ptr, <16 x i8> %val) { -; CHECK-LABEL: store_v16i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <16 x i8> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v16i8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v16i8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <16 x i8> %val, ptr %A, align 1 + ret void } define void @store_v16i8_update(ptr %ptr, <16 x i8> %val) { -; CHECK-LABEL: store_v16i8_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <16 x i8> %val, ptr %A, align 1 - %inc = getelementptr <16 x i8>, ptr %A, i316 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v16i8_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v16i8_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <16 x i8> %val, ptr %A, align 1 + %inc = getelementptr <16 x i8>, ptr %A, i316 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v8i16(ptr %ptr, <8 x i16> %val) { -; CHECK-LABEL: store_v8i16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <8 x i16> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v8i16: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v8i16: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <8 x i16> %val, ptr %A, align 1 + ret void } define void @store_v8i16_update(ptr %ptr, <8 x i16> %val) { -; CHECK-LABEL: store_v8i16_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <8 x i16> %val, ptr %A, align 1 - %inc = getelementptr <8 x i16>, ptr %A, i38 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v8i16_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v8i16_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <8 x i16> %val, ptr %A, align 1 + %inc = getelementptr <8 x i16>, ptr %A, i38 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v4i32(ptr %ptr, <4 x i32> %val) { -; CHECK-LABEL: store_v4i32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x i32> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v4i32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x i32> %val, ptr %A, align 1 + ret void } define void @store_v4i32_update(ptr %ptr, <4 x i32> %val) { -; CHECK-LABEL: store_v4i32_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x i32> %val, ptr %A, align 1 - %inc = getelementptr <4 x i32>, ptr %A, i34 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v4i32_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4i32_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x i32> %val, ptr %A, align 1 + %inc = getelementptr <4 x i32>, ptr %A, i34 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v4f32(ptr %ptr, <4 x float> %val) { -; CHECK-LABEL: store_v4f32: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x float> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v4f32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4f32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x float> %val, ptr %A, align 1 + ret void } define void @store_v4f32_update(ptr %ptr, <4 x float> %val) { -; CHECK-LABEL: store_v4f32_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <4 x float> %val, ptr %A, align 1 - %inc = getelementptr <4 x float>, ptr %A, i34 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v4f32_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v4f32_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <4 x float> %val, ptr %A, align 1 + %inc = getelementptr <4 x float>, ptr %A, i34 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vst1.8 {d16, d17}, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 1 - ret void +; CHECK-LE-LABEL: store_v2i64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 1 + ret void } define void @store_v2i64_update(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.8 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 1 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.8 q8, q8 +; CHECK-BE-NEXT: vst1.8 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 1 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64_update_aligned2(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update_aligned2: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.16 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 2 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update_aligned2: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.16 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update_aligned2: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.16 q8, q8 +; CHECK-BE-NEXT: vst1.16 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 2 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64_update_aligned4(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update_aligned4: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.32 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 4 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update_aligned4: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.32 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update_aligned4: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vst1.32 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 4 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64_update_aligned8(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update_aligned8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.64 {d16, d17}, [r1]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 8 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update_aligned8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r1]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update_aligned8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r1]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 8 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @store_v2i64_update_aligned16(ptr %ptr, <2 x i64> %val) { -; CHECK-LABEL: store_v2i64_update_aligned16: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]! -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - store <2 x i64> %val, ptr %A, align 16 - %inc = getelementptr <2 x i64>, ptr %A, i32 1 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: store_v2i64_update_aligned16: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r1:128]! +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: store_v2i64_update_aligned16: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r1:128]! +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + store <2 x i64> %val, ptr %A, align 16 + %inc = getelementptr <2 x i64>, ptr %A, i32 1 + store ptr %inc, ptr %ptr + ret void } define void @truncstore_v4i32tov4i8(ptr %ptr, <4 x i32> %val) { -; CHECK-LABEL: truncstore_v4i32tov4i8: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: vmovn.i32 d16, q8 -; CHECK-NEXT: vuzp.8 d16, d17 -; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - %trunc = trunc <4 x i32> %val to <4 x i8> - store <4 x i8> %trunc, ptr %A, align 4 - ret void +; CHECK-LE-LABEL: truncstore_v4i32tov4i8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmovn.i32 d16, q8 +; CHECK-LE-NEXT: vuzp.8 d16, d17 +; CHECK-LE-NEXT: vst1.32 {d16[0]}, [r0:32] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: truncstore_v4i32tov4i8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vmovn.i32 d16, q8 +; CHECK-BE-NEXT: vrev16.8 d16, d16 +; CHECK-BE-NEXT: vuzp.8 d16, d17 +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev32.8 d16, d17 +; CHECK-BE-NEXT: vst1.32 {d16[0]}, [r0:32] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + %trunc = trunc <4 x i32> %val to <4 x i8> + store <4 x i8> %trunc, ptr %A, align 4 + ret void } define void @truncstore_v4i32tov4i8_fake_update(ptr %ptr, <4 x i32> %val) { -; CHECK-LABEL: truncstore_v4i32tov4i8_fake_update: -; CHECK: @ %bb.0: -; CHECK-NEXT: vldr d17, [sp] -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: ldr r1, [r0] -; CHECK-NEXT: movs r2, #16 -; CHECK-NEXT: vmovn.i32 d16, q8 -; CHECK-NEXT: vuzp.8 d16, d17 -; CHECK-NEXT: vst1.32 {d16[0]}, [r1:32], r2 -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: bx lr - %A = load ptr, ptr %ptr - %trunc = trunc <4 x i32> %val to <4 x i8> - store <4 x i8> %trunc, ptr %A, align 4 - %inc = getelementptr <4 x i8>, ptr %A, i38 4 - store ptr %inc, ptr %ptr - ret void +; CHECK-LE-LABEL: truncstore_v4i32tov4i8_fake_update: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d17, [sp] +; CHECK-LE-NEXT: vmov d16, r2, r3 +; CHECK-LE-NEXT: ldr r1, [r0] +; CHECK-LE-NEXT: movs r2, #16 +; CHECK-LE-NEXT: vmovn.i32 d16, q8 +; CHECK-LE-NEXT: vuzp.8 d16, d17 +; CHECK-LE-NEXT: vst1.32 {d16[0]}, [r1:32], r2 +; CHECK-LE-NEXT: str r1, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: truncstore_v4i32tov4i8_fake_update: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d17, [sp] +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: movs r2, #16 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vmovn.i32 d16, q8 +; CHECK-BE-NEXT: vrev16.8 d16, d16 +; CHECK-BE-NEXT: vuzp.8 d16, d17 +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: vrev32.8 d16, d17 +; CHECK-BE-NEXT: vst1.32 {d16[0]}, [r1:32], r2 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %A = load ptr, ptr %ptr + %trunc = trunc <4 x i32> %val to <4 x i8> + store <4 x i8> %trunc, ptr %A, align 4 + %inc = getelementptr <4 x i8>, ptr %A, i38 4 + store ptr %inc, ptr %ptr + ret void } define ptr @test_vst1_1reg(ptr %ptr.in, ptr %ptr.out) { -; CHECK-LABEL: test_vst1_1reg: -; CHECK: @ %bb.0: -; CHECK-NEXT: vld1.64 {d16, d17}, [r0] -; CHECK-NEXT: movs r0, #32 -; CHECK-NEXT: vst1.32 {d16, d17}, [r1], r0 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: test_vst1_1reg: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-LE-NEXT: movs r0, #32 +; CHECK-LE-NEXT: vst1.32 {d16, d17}, [r1], r0 +; CHECK-LE-NEXT: mov r0, r1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: test_vst1_1reg: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-BE-NEXT: movs r0, #32 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vst1.32 {d16, d17}, [r1], r0 +; CHECK-BE-NEXT: mov r0, r1 +; CHECK-BE-NEXT: bx lr %val = load <4 x i32>, ptr %ptr.in store <4 x i32> %val, ptr %ptr.out %next = getelementptr <4 x i32>, ptr %ptr.out, i32 2 @@ -400,37 +613,65 @@ define ptr @test_vst1_1reg(ptr %ptr.in, ptr %ptr.out) { ; PR56970 define void @v3i8store(ptr %p) { -; CHECK-LABEL: v3i8store: -; CHECK: @ %bb.0: -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vmov.i32 d16, #0xff -; CHECK-NEXT: mov r1, sp -; CHECK-NEXT: vmov.i32 d17, #0x0 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vand d16, d17, d16 -; CHECK-NEXT: vst1.32 {d16[0]}, [r1:32] -; CHECK-NEXT: vld1.32 {d16[0]}, [r1:32] -; CHECK-NEXT: vmovl.u16 q8, d16 -; CHECK-NEXT: strb r2, [r0, #2] -; CHECK-NEXT: vmov.32 r1, d16[0] -; CHECK-NEXT: strh r1, [r0] -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: v3i8store: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: .pad #4 +; CHECK-LE-NEXT: sub sp, #4 +; CHECK-LE-NEXT: movs r1, #0 +; CHECK-LE-NEXT: mov r2, sp +; CHECK-LE-NEXT: str r1, [sp] +; CHECK-LE-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-LE-NEXT: strb r1, [r0, #2] +; CHECK-LE-NEXT: vmovl.u16 q8, d16 +; CHECK-LE-NEXT: vmov.32 r2, d16[0] +; CHECK-LE-NEXT: strh r2, [r0] +; CHECK-LE-NEXT: add sp, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: v3i8store: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: .pad #4 +; CHECK-BE-NEXT: sub sp, #4 +; CHECK-BE-NEXT: movs r1, #0 +; CHECK-BE-NEXT: mov r2, sp +; CHECK-BE-NEXT: str r1, [sp] +; CHECK-BE-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-BE-NEXT: strb r1, [r0, #2] +; CHECK-BE-NEXT: vrev32.16 d16, d16 +; CHECK-BE-NEXT: vmovl.u16 q8, d16 +; CHECK-BE-NEXT: vmov.32 r2, d16[0] +; CHECK-BE-NEXT: strh r2, [r0] +; CHECK-BE-NEXT: add sp, #4 +; CHECK-BE-NEXT: bx lr store <3 x i8> zeroinitializer, ptr %p, align 4 ret void } define void @v3i64shuffle(ptr %p, <3 x i64> %a) { -; CHECK-LABEL: v3i64shuffle: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i32 q8, #0x0 -; CHECK-NEXT: ldrd r12, r1, [sp, #8] -; CHECK-NEXT: vmov d18, r2, r3 -; CHECK-NEXT: vorr d19, d16, d16 -; CHECK-NEXT: str r1, [r0, #20] -; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! -; CHECK-NEXT: str.w r12, [r0] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: v3i64shuffle: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i32 q8, #0x0 +; CHECK-LE-NEXT: ldrd r12, r1, [sp, #8] +; CHECK-LE-NEXT: vmov d18, r2, r3 +; CHECK-LE-NEXT: vorr d19, d16, d16 +; CHECK-LE-NEXT: str r1, [r0, #20] +; CHECK-LE-NEXT: vst1.32 {d18, d19}, [r0]! +; CHECK-LE-NEXT: str.w r12, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: v3i64shuffle: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [sp, #8] +; CHECK-BE-NEXT: vmov.i32 q9, #0x0 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vmov r12, r1, d16 +; CHECK-BE-NEXT: vmov d16, r3, r2 +; CHECK-BE-NEXT: vorr d17, d18, d18 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: str r1, [r0, #20] +; CHECK-BE-NEXT: vst1.32 {d16, d17}, [r0]! +; CHECK-BE-NEXT: str.w r12, [r0] +; CHECK-BE-NEXT: bx lr %b = shufflevector <3 x i64> %a, <3 x i64> zeroinitializer, <3 x i32> <i32 0, i32 3, i32 2> store <3 x i64> %b, ptr %p, align 4 ret void diff --git a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll index 028bb76c3d43..74782d44c742 100644 --- a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll +++ b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll @@ -29,6 +29,12 @@ define %v2f32 @test_v2f32.cos(%v2f32 %a) { %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a) ret %v2f32 %1 } +; CHECK-LABEL: test_v2f32.tan:{{.*}} +define %v2f32 @test_v2f32.tan(%v2f32 %a) { + ; CHECK: tan + %1 = call %v2f32 @llvm.tan.v2f32(%v2f32 %a) + ret %v2f32 %1 +} ; CHECK-LABEL: test_v2f32.pow:{{.*}} define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) { ; CHECK: pow @@ -112,6 +118,7 @@ declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0 declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0 declare %v2f32 @llvm.cos.v2f32(%v2f32) #0 +declare %v2f32 @llvm.tan.v2f32(%v2f32) #0 declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0 declare %v2f32 @llvm.exp.v2f32(%v2f32) #0 declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0 @@ -153,6 +160,12 @@ define %v4f32 @test_v4f32.cos(%v4f32 %a) { %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a) ret %v4f32 %1 } +; CHECK-LABEL: test_v4f32.tan:{{.*}} +define %v4f32 @test_v4f32.tan(%v4f32 %a) { + ; CHECK: tan + %1 = call %v4f32 @llvm.tan.v4f32(%v4f32 %a) + ret %v4f32 %1 +} ; CHECK-LABEL: test_v4f32.pow:{{.*}} define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) { ; CHECK: pow @@ -236,6 +249,7 @@ declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0 declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0 declare %v4f32 @llvm.sin.v4f32(%v4f32) #0 declare %v4f32 @llvm.cos.v4f32(%v4f32) #0 +declare %v4f32 @llvm.tan.v4f32(%v4f32) #0 declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0 declare %v4f32 @llvm.exp.v4f32(%v4f32) #0 declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0 @@ -277,6 +291,12 @@ define %v2f64 @test_v2f64.cos(%v2f64 %a) { %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a) ret %v2f64 %1 } +; CHECK-LABEL: test_v2f64.tan:{{.*}} +define %v2f64 @test_v2f64.tan(%v2f64 %a) { + ; CHECK: tan + %1 = call %v2f64 @llvm.tan.v2f64(%v2f64 %a) + ret %v2f64 %1 +} ; CHECK-LABEL: test_v2f64.pow:{{.*}} define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) { ; CHECK: pow @@ -361,6 +381,7 @@ declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0 declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0 declare %v2f64 @llvm.sin.v2f64(%v2f64) #0 declare %v2f64 @llvm.cos.v2f64(%v2f64) #0 +declare %v2f64 @llvm.tan.v2f64(%v2f64) #0 declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0 declare %v2f64 @llvm.exp.v2f64(%v2f64) #0 declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0 |
