diff options
Diffstat (limited to 'llvm/test/CodeGen/ARM')
41 files changed, 9715 insertions, 858 deletions
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir index 77eeb34ef18c..4dd8af01f873 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir @@ -447,7 +447,7 @@ body: | ; CHECK-LABEL: name: test_vnmuls ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1 - ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $s0 = COPY [[VNMULS]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 @@ -477,7 +477,7 @@ body: | ; CHECK-LABEL: name: test_vnmuls_reassociate ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1 - ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $s0 = COPY [[VNMULS]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 @@ -507,7 +507,7 @@ body: | ; CHECK-LABEL: name: test_vnmuld ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1 - ; CHECK: [[VNMULD:%[0-9]+]]:dpr = VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VNMULD:%[0-9]+]]:dpr = nofpexcept VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $d0 = COPY [[VNMULD]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 @@ -539,7 +539,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1 ; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2 - ; CHECK: [[VFNMAS:%[0-9]+]]:spr = VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VFNMAS:%[0-9]+]]:spr = nofpexcept VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $s0 = COPY [[VFNMAS]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 @@ -573,7 +573,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2 - ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = nofpexcept VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $d0 = COPY [[VFNMAD]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 @@ -607,7 +607,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1 ; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2 - ; CHECK: [[VFMSS:%[0-9]+]]:spr = VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VFMSS:%[0-9]+]]:spr = nofpexcept VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $s0 = COPY [[VFMSS]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 @@ -640,7 +640,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2 - ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg + ; CHECK: [[VFMSD:%[0-9]+]]:dpr = nofpexcept VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $d0 = COPY [[VFMSD]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 @@ -673,7 +673,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1 ; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2 - ; CHECK: [[VFNMSS:%[0-9]+]]:spr = VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VFNMSS:%[0-9]+]]:spr = nofpexcept VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $s0 = COPY [[VFNMSS]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir index c8fee5d33442..7cbe5de22deb 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir @@ -119,9 +119,10 @@ body: | ; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SUB [[COUNT]], [[BITDIFF]] %2(s16) = G_CTLZ %1 - ; CHECK: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[R32]], [[BITDIFF]] - ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]] - ; CHECK: $r0 = COPY [[R]] + ; LIBCALLS: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[R32]], [[BITDIFF]] + ; LIBCALLS: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]] + ; LIBCALLS: $r0 = COPY [[R]] + ; CLZ: $r0 = COPY [[R32]] %3(s32) = G_SEXT %2(s16) $r0 = COPY %3(s32) BX_RET 14, $noreg, implicit $r0 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir index 45a846b5b877..4cded131190f 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir @@ -19,7 +19,7 @@ body: | bb.1: ; CHECK-LABEL: name: test_fptosi ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0 - ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = VTOSIZS [[COPY]], 14 /* CC::al */, $noreg + ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]] ; CHECK: $r0 = COPY [[COPY1]] ; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir index ec834f1233ac..4517fe6dd4f1 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -O0 -mtriple arm-- -mattr=+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -O0 -mtriple thumb-- -mattr=+v6t2,+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | @@ -76,11 +77,9 @@ body: | ... --- name: test_fadd_s32 -# CHECK-LABEL: name: test_fadd_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -89,28 +88,29 @@ body: | bb.0: liveins: $s0, $s1 + ; CHECK-LABEL: name: test_fadd_s32 + ; CHECK: liveins: $s0, $s1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1 + ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $s0 = COPY [[VADDS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s32) = COPY $s1 - ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1 %2(s32) = G_FADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VADDS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $s0 = COPY %2(s32) - ; CHECK: $s0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_fadd_s64 -# CHECK-LABEL: name: test_fadd_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -119,28 +119,29 @@ body: | bb.0: liveins: $d0, $d1 + ; CHECK-LABEL: name: test_fadd_s64 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1 + ; CHECK-NEXT: [[VADDD:%[0-9]+]]:dpr = nofpexcept VADDD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $d0 = COPY [[VADDD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s64) = COPY $d1 - ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1 %2(s64) = G_FADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VADDD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $d0 = COPY %2(s64) - ; CHECK: $d0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_fsub_s32 -# CHECK-LABEL: name: test_fsub_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -149,28 +150,29 @@ body: | bb.0: liveins: $s0, $s1 + ; CHECK-LABEL: name: test_fsub_s32 + ; CHECK: liveins: $s0, $s1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1 + ; CHECK-NEXT: [[VSUBS:%[0-9]+]]:spr = nofpexcept VSUBS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $s0 = COPY [[VSUBS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s32) = COPY $s1 - ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1 %2(s32) = G_FSUB %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VSUBS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $s0 = COPY %2(s32) - ; CHECK: $s0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_fsub_s64 -# CHECK-LABEL: name: test_fsub_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -179,28 +181,29 @@ body: | bb.0: liveins: $d0, $d1 + ; CHECK-LABEL: name: test_fsub_s64 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1 + ; CHECK-NEXT: [[VSUBD:%[0-9]+]]:dpr = nofpexcept VSUBD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $d0 = COPY [[VSUBD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s64) = COPY $d1 - ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1 %2(s64) = G_FSUB %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VSUBD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $d0 = COPY %2(s64) - ; CHECK: $d0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_fmul_s32 -# CHECK-LABEL: name: test_fmul_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -209,28 +212,29 @@ body: | bb.0: liveins: $s0, $s1 + ; CHECK-LABEL: name: test_fmul_s32 + ; CHECK: liveins: $s0, $s1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1 + ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nofpexcept VMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $s0 = COPY [[VMULS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s32) = COPY $s1 - ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1 %2(s32) = G_FMUL %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VMULS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $s0 = COPY %2(s32) - ; CHECK: $s0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_fmul_s64 -# CHECK-LABEL: name: test_fmul_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -239,28 +243,29 @@ body: | bb.0: liveins: $d0, $d1 + ; CHECK-LABEL: name: test_fmul_s64 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1 + ; CHECK-NEXT: [[VMULD:%[0-9]+]]:dpr = nofpexcept VMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $d0 = COPY [[VMULD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s64) = COPY $d1 - ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1 %2(s64) = G_FMUL %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VMULD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $d0 = COPY %2(s64) - ; CHECK: $d0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_fdiv_s32 -# CHECK-LABEL: name: test_fdiv_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -269,28 +274,29 @@ body: | bb.0: liveins: $s0, $s1 + ; CHECK-LABEL: name: test_fdiv_s32 + ; CHECK: liveins: $s0, $s1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1 + ; CHECK-NEXT: [[VDIVS:%[0-9]+]]:spr = nofpexcept VDIVS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $s0 = COPY [[VDIVS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s32) = COPY $s1 - ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1 %2(s32) = G_FDIV %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VDIVS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $s0 = COPY %2(s32) - ; CHECK: $s0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_fdiv_s64 -# CHECK-LABEL: name: test_fdiv_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -299,28 +305,29 @@ body: | bb.0: liveins: $d0, $d1 + ; CHECK-LABEL: name: test_fdiv_s64 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1 + ; CHECK-NEXT: [[VDIVD:%[0-9]+]]:dpr = nofpexcept VDIVD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $d0 = COPY [[VDIVD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s64) = COPY $d1 - ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1 %2(s64) = G_FDIV %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VDIVD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $d0 = COPY %2(s64) - ; CHECK: $d0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_fneg_s32 -# CHECK-LABEL: name: test_fneg_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -328,25 +335,26 @@ body: | bb.0: liveins: $s0 + ; CHECK-LABEL: name: test_fneg_s32 + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[VNEGS:%[0-9]+]]:spr = VNEGS [[COPY]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: $s0 = COPY [[VNEGS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s32) = G_FNEG %0 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VNEGS [[VREGX]], 14 /* CC::al */, $noreg $s0 = COPY %1(s32) - ; CHECK: $s0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_fneg_s64 -# CHECK-LABEL: name: test_fneg_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -355,25 +363,26 @@ body: | bb.0: liveins: $d0 + ; CHECK-LABEL: name: test_fneg_s64 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[VNEGD:%[0-9]+]]:dpr = VNEGD [[COPY]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: $d0 = COPY [[VNEGD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s64) = G_FNEG %0 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VNEGD [[VREGX]], 14 /* CC::al */, $noreg $d0 = COPY %1(s64) - ; CHECK: $d0 = COPY [[VREGSUM]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_fma_s32 -# CHECK-LABEL: name: test_fma_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -383,31 +392,32 @@ body: | bb.0: liveins: $s0, $s1, $s2 + ; CHECK-LABEL: name: test_fma_s32 + ; CHECK: liveins: $s0, $s1, $s2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:spr = COPY $s2 + ; CHECK-NEXT: [[VFMAS:%[0-9]+]]:spr = nofpexcept VFMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $s0 = COPY [[VFMAS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s32) = COPY $s1 - ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1 %2(s32) = COPY $s2 - ; CHECK: [[VREGZ:%[0-9]+]]:spr = COPY $s2 %3(s32) = G_FMA %0, %1, %2 - ; CHECK: [[VREGR:%[0-9]+]]:spr = VFMAS [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $s0 = COPY %3(s32) - ; CHECK: $s0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_fma_s64 -# CHECK-LABEL: name: test_fma_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -417,31 +427,32 @@ body: | bb.0: liveins: $d0, $d1, $d2 + ; CHECK-LABEL: name: test_fma_s64 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2 + ; CHECK-NEXT: [[VFMAD:%[0-9]+]]:dpr = nofpexcept VFMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $d0 = COPY [[VFMAD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s64) = COPY $d1 - ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1 %2(s64) = COPY $d2 - ; CHECK: [[VREGZ:%[0-9]+]]:dpr = COPY $d2 %3(s64) = G_FMA %0, %1, %2 - ; CHECK: [[VREGR:%[0-9]+]]:dpr = VFMAD [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg $d0 = COPY %3(s64) - ; CHECK: $d0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_fpext_s32_to_s64 -# CHECK-LABEL: name: test_fpext_s32_to_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -449,25 +460,26 @@ body: | bb.0: liveins: $s0 + ; CHECK-LABEL: name: test_fpext_s32_to_s64 + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[VCVTDS:%[0-9]+]]:dpr = nofpexcept VCVTDS [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $d0 = COPY [[VCVTDS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s64) = G_FPEXT %0(s32) - ; CHECK: [[VREGR:%[0-9]+]]:dpr = VCVTDS [[VREGX]], 14 /* CC::al */, $noreg $d0 = COPY %1(s64) - ; CHECK: $d0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_fptrunc_s64_to_s32 -# CHECK-LABEL: name: test_fptrunc_s64_to_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -475,25 +487,26 @@ body: | bb.0: liveins: $d0 + ; CHECK-LABEL: name: test_fptrunc_s64_to_s32 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[VCVTSD:%[0-9]+]]:spr = nofpexcept VCVTSD [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: $s0 = COPY [[VCVTSD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s32) = G_FPTRUNC %0(s64) - ; CHECK: [[VREGR:%[0-9]+]]:spr = VCVTSD [[VREGX]], 14 /* CC::al */, $noreg $s0 = COPY %1(s32) - ; CHECK: $s0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_fptosi_s32 -# CHECK-LABEL: name: test_fptosi_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: gprb } @@ -501,26 +514,27 @@ body: | bb.0: liveins: $s0 + ; CHECK-LABEL: name: test_fptosi_s32 + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]] + ; CHECK-NEXT: $r0 = COPY [[COPY1]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s32) = G_FPTOSI %0(s32) - ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZS [[VREGX]], 14 /* CC::al */, $noreg - ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]] $r0 = COPY %1(s32) - ; CHECK: $r0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $r0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... --- name: test_fptosi_s64 -# CHECK-LABEL: name: test_fptosi_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: gprb } @@ -528,26 +542,27 @@ body: | bb.0: liveins: $d0 + ; CHECK-LABEL: name: test_fptosi_s64 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[VTOSIZD:%[0-9]+]]:spr = nofpexcept VTOSIZD [[COPY]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZD]] + ; CHECK-NEXT: $r0 = COPY [[COPY1]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s32) = G_FPTOSI %0(s64) - ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZD [[VREGX]], 14 /* CC::al */, $noreg - ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]] $r0 = COPY %1(s32) - ; CHECK: $r0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $r0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... --- name: test_fptoui_s32 -# CHECK-LABEL: name: test_fptoui_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: gprb } @@ -555,26 +570,27 @@ body: | bb.0: liveins: $s0 + ; CHECK-LABEL: name: test_fptoui_s32 + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[VTOUIZS:%[0-9]+]]:spr = nofpexcept VTOUIZS [[COPY]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZS]] + ; CHECK-NEXT: $r0 = COPY [[COPY1]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0 %0(s32) = COPY $s0 - ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0 %1(s32) = G_FPTOUI %0(s32) - ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZS [[VREGX]], 14 /* CC::al */, $noreg - ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]] $r0 = COPY %1(s32) - ; CHECK: $r0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $r0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... --- name: test_fptoui_s64 -# CHECK-LABEL: name: test_fptoui_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: gprb } @@ -582,26 +598,27 @@ body: | bb.0: liveins: $d0 + ; CHECK-LABEL: name: test_fptoui_s64 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0 + ; CHECK-NEXT: [[VTOUIZD:%[0-9]+]]:spr = nofpexcept VTOUIZD [[COPY]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZD]] + ; CHECK-NEXT: $r0 = COPY [[COPY1]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0 %0(s64) = COPY $d0 - ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0 %1(s32) = G_FPTOUI %0(s64) - ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZD [[VREGX]], 14 /* CC::al */, $noreg - ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]] $r0 = COPY %1(s32) - ; CHECK: $r0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $r0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... --- name: test_sitofp_s32 -# CHECK-LABEL: name: test_sitofp_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: fprb } @@ -609,26 +626,27 @@ body: | bb.0: liveins: $r0 + ; CHECK-LABEL: name: test_sitofp_s32 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]] + ; CHECK-NEXT: [[VSITOS:%[0-9]+]]:spr = nofpexcept VSITOS [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: $s0 = COPY [[VSITOS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $r0 - ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0 %1(s32) = G_SITOFP %0(s32) - ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]] - ; CHECK: [[VREGR:%[0-9]+]]:spr = VSITOS [[VREGF]], 14 /* CC::al */, $noreg $s0 = COPY %1(s32) - ; CHECK: $s0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_sitofp_s64 -# CHECK-LABEL: name: test_sitofp_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: fprb } @@ -636,26 +654,27 @@ body: | bb.0: liveins: $r0 + ; CHECK-LABEL: name: test_sitofp_s64 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]] + ; CHECK-NEXT: [[VSITOD:%[0-9]+]]:dpr = nofpexcept VSITOD [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: $d0 = COPY [[VSITOD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s32) = COPY $r0 - ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0 %1(s64) = G_SITOFP %0(s32) - ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]] - ; CHECK: [[VREGR:%[0-9]+]]:dpr = VSITOD [[VREGF]], 14 /* CC::al */, $noreg $d0 = COPY %1(s64) - ; CHECK: $d0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_uitofp_s32 -# CHECK-LABEL: name: test_uitofp_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: fprb } @@ -663,26 +682,27 @@ body: | bb.0: liveins: $r0 + ; CHECK-LABEL: name: test_uitofp_s32 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]] + ; CHECK-NEXT: [[VUITOS:%[0-9]+]]:spr = nofpexcept VUITOS [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: $s0 = COPY [[VUITOS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(s32) = COPY $r0 - ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0 %1(s32) = G_UITOFP %0(s32) - ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]] - ; CHECK: [[VREGR:%[0-9]+]]:spr = VUITOS [[VREGF]], 14 /* CC::al */, $noreg $s0 = COPY %1(s32) - ; CHECK: $s0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_uitofp_s64 -# CHECK-LABEL: name: test_uitofp_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: fprb } @@ -690,26 +710,27 @@ body: | bb.0: liveins: $r0 + ; CHECK-LABEL: name: test_uitofp_s64 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]] + ; CHECK-NEXT: [[VUITOD:%[0-9]+]]:dpr = nofpexcept VUITOD [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: $d0 = COPY [[VUITOD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s32) = COPY $r0 - ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0 %1(s64) = G_UITOFP %0(s32) - ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]] - ; CHECK: [[VREGR:%[0-9]+]]:dpr = VUITOD [[VREGF]], 14 /* CC::al */, $noreg $d0 = COPY %1(s64) - ; CHECK: $d0 = COPY [[VREGR]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_load_f32 -# CHECK-LABEL: name: test_load_f32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: fprb } @@ -717,25 +738,26 @@ body: | bb.0: liveins: $r0 + ; CHECK-LABEL: name: test_load_f32 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32)) + ; CHECK-NEXT: $s0 = COPY [[VLDRS]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 %0(p0) = COPY $r0 - ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0 %1(s32) = G_LOAD %0(p0) :: (load (s32)) - ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14 /* CC::al */, $noreg $s0 = COPY %1 - ; CHECK: $s0 = COPY %[[V]] BX_RET 14, $noreg, implicit $s0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 ... --- name: test_load_f64 -# CHECK-LABEL: name: test_load_f64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: fprb } @@ -743,45 +765,50 @@ body: | bb.0: liveins: $r0 + ; CHECK-LABEL: name: test_load_f64 + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-NEXT: [[VLDRD:%[0-9]+]]:dpr = VLDRD [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s64)) + ; CHECK-NEXT: $d0 = COPY [[VLDRD]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(p0) = COPY $r0 - ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0 %1(s64) = G_LOAD %0(p0) :: (load (s64)) - ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14 /* CC::al */, $noreg $d0 = COPY %1 - ; CHECK: $d0 = COPY %[[V]] BX_RET 14, $noreg, implicit $d0 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 ... --- name: test_stores -# CHECK-LABEL: name: test_stores legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: fprb } - { id: 2, class: fprb } -# CHECK: id: [[P:[0-9]+]], class: gpr -# CHECK: id: [[F32:[0-9]+]], class: spr -# CHECK: id: [[F64:[0-9]+]], class: dpr body: | bb.0: liveins: $r0, $s0, $d0 + ; CHECK-LABEL: name: test_stores + ; CHECK: liveins: $r0, $s0, $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2 + ; CHECK-NEXT: VSTRS [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32)) + ; CHECK-NEXT: VSTRD [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s64)) + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 %1(s32) = COPY $s0 %2(s64) = COPY $d2 G_STORE %1(s32), %0(p0) :: (store (s32)) - ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14 /* CC::al */, $noreg G_STORE %2(s64), %0(p0) :: (store (s64)) - ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14 /* CC::al */, $noreg BX_RET 14, $noreg ... @@ -833,11 +860,9 @@ body: | ... --- name: test_soft_fp_double -# CHECK-LABEL: name: test_soft_fp_double legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -848,24 +873,27 @@ body: | bb.0: liveins: $r0, $r1, $r2, $r3 + ; CHECK-LABEL: name: test_soft_fp_double + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r3 + ; CHECK-NEXT: [[VMOVDRR:%[0-9]+]]:dpr = VMOVDRR [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[VMOVDRR]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: $r0 = COPY [[VMOVRRD]] + ; CHECK-NEXT: $r1 = COPY [[VMOVRRD1]] + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1 %0(s32) = COPY $r2 - ; CHECK: [[IN1:%[0-9]+]]:gpr = COPY $r2 %1(s32) = COPY $r3 - ; CHECK: [[IN2:%[0-9]+]]:gpr = COPY $r3 %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) - ; CHECK: %[[DREG:[0-9]+]]:dpr = VMOVDRR [[IN1]], [[IN2]] %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64) - ; CHECK: [[OUT1:%[0-9]+]]:gpr, [[OUT2:%[0-9]+]]:gpr = VMOVRRD %[[DREG]] $r0 = COPY %3 - ; CHECK: $r0 = COPY [[OUT1]] $r1 = COPY %4 - ; CHECK: $r1 = COPY [[OUT2]] BX_RET 14, $noreg, implicit $r0, implicit $r1 - ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1 ... diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir index a6fc4dad49fd..fa982d8a60d7 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir @@ -31,7 +31,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2 - ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg + ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = nofpexcept VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK: $d0 = COPY [[VFNMSD]] ; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $d0 %0:fprb(s64) = COPY $d0 diff --git a/llvm/test/CodeGen/ARM/and-mask-variable.ll b/llvm/test/CodeGen/ARM/and-mask-variable.ll new file mode 100644 index 000000000000..0f84b76f97a6 --- /dev/null +++ b/llvm/test/CodeGen/ARM/and-mask-variable.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M +; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A +; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T +; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M + +define i32 @mask_pair(i32 %x, i32 %y) { +; V7M-LABEL: mask_pair: +; V7M: @ %bb.0: +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: mask_pair: +; V7A: @ %bb.0: +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: mask_pair: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: mask_pair: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: bx lr + %shl = shl nsw i32 -1, %y + %and = and i32 %shl, %x + ret i32 %and +} + +define i64 @mask_pair_64(i64 %x, i64 %y) { +; V7M-LABEL: mask_pair_64: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsl.w r12, r3, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl.w r12, #0 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl r3, r2 +; V7M-NEXT: and.w r0, r0, r12 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: bx lr +; +; V7A-LABEL: mask_pair_64: +; V7A: @ %bb.0: +; V7A-NEXT: subs r12, r2, #32 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsl r2, r3, r2 +; V7A-NEXT: lslpl r3, r3, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: and r1, r3, r1 +; V7A-NEXT: and r0, r2, r0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: mask_pair_64: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsl.w r12, r3, r2 +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl.w r12, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl r3, r2 +; V7A-T-NEXT: and.w r0, r0, r12 +; V7A-T-NEXT: ands r1, r3 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: mask_pair_64: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %shl = shl nsw i64 -1, %y + %and = and i64 %shl, %x + ret i64 %and +} diff --git a/llvm/test/CodeGen/ARM/bf16_fast_math.ll b/llvm/test/CodeGen/ARM/bf16_fast_math.ll index 1b18ea6feb2e..5f7e1e69d99d 100644 --- a/llvm/test/CodeGen/ARM/bf16_fast_math.ll +++ b/llvm/test/CodeGen/ARM/bf16_fast_math.ll @@ -17,7 +17,7 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) { ; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]] @@ -44,7 +44,7 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) { ; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]] @@ -71,7 +71,7 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) { ; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]] @@ -102,7 +102,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]] @@ -113,7 +113,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS1]] @@ -142,10 +142,10 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) ; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[MOVsi2:%[0-9]+]]:gpr = MOVsi [[COPY]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]] @@ -174,7 +174,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]] @@ -185,7 +185,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg ; CHECK-NOBF16-NEXT: [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg - ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg + ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-NOBF16-NEXT: [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg ; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS1]] diff --git a/llvm/test/CodeGen/ARM/build-attributes.ll b/llvm/test/CodeGen/ARM/build-attributes.ll index 68844aed0363..306a4a31b79f 100644 --- a/llvm/test/CodeGen/ARM/build-attributes.ll +++ b/llvm/test/CodeGen/ARM/build-attributes.ll @@ -3,23 +3,16 @@ ; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale -mattr=+strict-align | FileCheck %s --check-prefix=XSCALE ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6 -; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6-FAST ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M -; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST ; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M -; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align | FileCheck %s --check-prefix=ARM1156T2F-S -; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=ARM1156T2F-S-FAST ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7M-FAST ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V8-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8 ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING @@ -31,35 +24,24 @@ ; RUN: llc < %s -mtriple=thumbv8m.main-linux-gnueabi | FileCheck %s --check-prefix=V8MMAINLINE ; RUN: llc < %s -mtriple=thumbv8m.main-linux-gnueabi -mattr=+dsp | FileCheck %s --check-prefix=V8MMAINLINE_DSP ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-neon,-d32 | FileCheck %s --check-prefix=CORTEX-A5-NONEON ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2sp | FileCheck %s --check-prefix=CORTEX-A5-NOFPU -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2sp -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-NOFPU-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A8-SOFT -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=soft -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A8-SOFT-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A8-HARD -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=hard -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A8-HARD-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A8-SOFT ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A9-SOFT-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A9-HARD-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2sp | FileCheck %s --check-prefix=CORTEX-A12-NOFPU -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2sp -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-NOFPU-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15 -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A15-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 | FileCheck %s --check-prefix=CORTEX-A17-DEFAULT -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2sp | FileCheck %s --check-prefix=CORTEX-A17-NOFPU -; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2sp -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-no-trapping-fp-math | FileCheck %s --check-prefix=NO-TRAPPING-MATH ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -denormal-fp-math=ieee | FileCheck %s --check-prefix=DENORMAL-IEEE @@ -74,37 +56,26 @@ ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0 -; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus | FileCheck %s --check-prefix=CORTEX-M0PLUS -; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0PLUS-FAST ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 | FileCheck %s --check-prefix=CORTEX-M1 -; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M1-FAST ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align | FileCheck %s --check-prefix=SC000 -; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC000-FAST ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3 -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M3-FAST ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 | FileCheck %s --check-prefix=SC300 -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC300-FAST ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M4-SOFT-FAST ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD -; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M4-HARD-FAST ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2sp | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SOFT -; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2sp -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-NOFPU-FAST ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-fp64 | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SINGLE -; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-fp64 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7-DOUBLE ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m23 | FileCheck %s --check-prefix=CORTEX-M23 ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=CORTEX-M33 -; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M33-FAST ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m35p | FileCheck %s --check-prefix=CORTEX-M35P @@ -113,49 +84,34 @@ ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4 | FileCheck %s --check-prefix=CORTEX-R4 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4f | FileCheck %s --check-prefix=CORTEX-R4F ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5 -; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R5-FAST ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 | FileCheck %s --check-prefix=CORTEX-R7 -; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R7-FAST ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r8 | FileCheck %s --check-prefix=CORTEX-R8 -; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r8 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R8-FAST ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r8 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a32 | FileCheck %s --check-prefix=CORTEX-A32 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a32 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A32-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a32 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 | FileCheck %s --check-prefix=CORTEX-A35 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A35-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A53-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A57-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=CORTEX-A72 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A72-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a73 | FileCheck %s --check-prefix=CORTEX-A73 ; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m3 | FileCheck %s --check-prefix=EXYNOS-M3 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m3 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=EXYNOS-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m3 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m4 | FileCheck %s --check-prefix=EXYNOS-M4 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m4 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=EXYNOS-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m4 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m5 | FileCheck %s --check-prefix=EXYNOS-M5 -; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m5 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=EXYNOS-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING -; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A-FAST ; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=CORTEX-A7-CHECK -; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-CHECK-FAST ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2sp,-vfp3,-vfp4,-neon,-fp16 | FileCheck %s --check-prefix=CORTEX-A7-NOFPU -; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2sp,-vfp3,-vfp4,-neon,-fp16 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-NOFPU-FAST ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING -; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-FPUV4-FAST ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,-d32,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER @@ -278,15 +234,6 @@ ; V6-NOT: .eabi_attribute 28 ; V6: .eabi_attribute 38, 1 -; V6-FAST-NOT: .eabi_attribute 19 -;; Despite the V6 CPU having no FPU by default, we chose to flush to -;; positive zero here. There's no hardware support doing this, but the -;; fast maths software library might. -; V6-FAST-NOT: .eabi_attribute 20 -; V6-FAST-NOT: .eabi_attribute 21 -; V6-FAST-NOT: .eabi_attribute 22 -; V6-FAST: .eabi_attribute 23, 1 - ;; We emit 6, 12 for both v6-M and v6S-M, technically this is incorrect for ;; V6-M, however we don't model the OS extension so this is fine. ; V6M: .eabi_attribute 6, 12 @@ -312,14 +259,6 @@ ; V6M-NOT: .eabi_attribute 28 ; V6M: .eabi_attribute 38, 1 -; V6M-FAST-NOT: .eabi_attribute 19 -;; Despite the V6M CPU having no FPU by default, we chose to flush to -;; positive zero here. There's no hardware support doing this, but the -;; fast maths software library might. -; V6M-FAST-NOT: .eabi_attribute 20 -; V6M-FAST-NOT: .eabi_attribute 21 -; V6M-FAST-NOT: .eabi_attribute 22 -; V6M-FAST: .eabi_attribute 23, 1 ; ARM1156T2F-S: .cpu arm1156t2f-s ; ARM1156T2F-S: .eabi_attribute 6, 8 @@ -342,14 +281,6 @@ ; ARM1156T2F-S-NOT: .eabi_attribute 28 ; ARM1156T2F-S: .eabi_attribute 38, 1 -; ARM1156T2F-S-FAST-NOT: .eabi_attribute 19 -;; V6 cores default to flush to positive zero (value 0). Note that value 2 is also equally -;; valid for this core, it's an implementation defined question as to which of 0 and 2 you -;; select. LLVM historically picks 0. -; ARM1156T2F-S-FAST-NOT: .eabi_attribute 20 -; ARM1156T2F-S-FAST-NOT: .eabi_attribute 21 -; ARM1156T2F-S-FAST-NOT: .eabi_attribute 22 -; ARM1156T2F-S-FAST: .eabi_attribute 23, 1 ; V7M: .eabi_attribute 6, 10 ; V7M: .eabi_attribute 7, 77 @@ -374,15 +305,6 @@ ; V7M-NOT: .eabi_attribute 28 ; V7M: .eabi_attribute 38, 1 -; V7M-FAST-NOT: .eabi_attribute 19 -;; Despite the V7M CPU having no FPU by default, we chose to flush -;; preserving sign. This matches what the hardware would do in the -;; architecture revision were to exist on the current target. -; V7M-FAST: .eabi_attribute 20, 2 -; V7M-FAST-NOT: .eabi_attribute 21 -; V7M-FAST-NOT: .eabi_attribute 22 -; V7M-FAST: .eabi_attribute 23, 1 - ; V7: .syntax unified ; V7: .eabi_attribute 6, 10 ; V7-NOT: .eabi_attribute 27 @@ -401,13 +323,6 @@ ; V7-NOT: .eabi_attribute 28 ; V7: .eabi_attribute 38, 1 -; V7-FAST-NOT: .eabi_attribute 19 -;; The default CPU does have an FPU and it must be VFPv3 or better, so it flushes -;; denormals to zero preserving the sign. -; V7-FAST: .eabi_attribute 20, 2 -; V7-FAST-NOT: .eabi_attribute 21 -; V7-FAST-NOT: .eabi_attribute 22 -; V7-FAST: .eabi_attribute 23, 1 ; V7VE: .syntax unified ; V7VE: .eabi_attribute 6, 10 @ Tag_CPU_arch @@ -435,12 +350,6 @@ ; V8-NOT: .eabi_attribute 22 ; V8: .eabi_attribute 23, 3 -; V8-FAST-NOT: .eabi_attribute 19 -;; The default does have an FPU, and for V8-A, it flushes preserving sign. -; V8-FAST: .eabi_attribute 20, 2 -; V8-FAST-NOT: .eabi_attribute 21 -; V8-FAST-NOT: .eabi_attribute 22 -; V8-FAST: .eabi_attribute 23, 1 ; Vt8: .syntax unified ; Vt8: .eabi_attribute 6, 14 @@ -552,15 +461,11 @@ ;; We default to IEEE 754 compliance ; CORTEX-A7-CHECK: .eabi_attribute 20, 1 ;; The A7 has VFPv3 support by default, so flush preserving sign. -; CORTEX-A7-CHECK-FAST: .eabi_attribute 20, 2 ; CORTEX-A7-NOFPU: .eabi_attribute 20, 1 ;; Despite there being no FPU, we chose to flush to zero preserving ;; sign. This matches what the hardware would do for this architecture ;; revision. -; CORTEX-A7-NOFPU-FAST: .eabi_attribute 20, 2 ; CORTEX-A7-FPUV4: .eabi_attribute 20, 1 -;; The VFPv4 FPU flushes preserving sign. -; CORTEX-A7-FPUV4-FAST: .eabi_attribute 20, 2 ; Tag_ABI_FP_exceptions ; CORTEX-A7-CHECK: .eabi_attribute 21, 1 @@ -610,13 +515,6 @@ ; CORTEX-A5-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A5-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 19 -;; The A5 defaults to a VFPv4 FPU, so it flushed preserving the sign when -ffast-math -;; is given. -; CORTEX-A5-DEFAULT-FAST: .eabi_attribute 20, 2 -; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 21 -; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 22 -; CORTEX-A5-DEFAULT-FAST: .eabi_attribute 23, 1 ; CORTEX-A5-NONEON: .cpu cortex-a5 ; CORTEX-A5-NONEON: .eabi_attribute 6, 10 @@ -634,13 +532,6 @@ ; CORTEX-A5-NONEON: .eabi_attribute 24, 1 ; CORTEX-A5-NONEON: .eabi_attribute 25, 1 -; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 19 -;; The A5 defaults to a VFPv4 FPU, so it flushed preserving sign when -ffast-math -;; is given. -; CORTEX-A5-NONEON-FAST: .eabi_attribute 20, 2 -; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 21 -; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 22 -; CORTEX-A5-NONEON-FAST: .eabi_attribute 23, 1 ; CORTEX-A5-NOFPU: .cpu cortex-a5 ; CORTEX-A5-NOFPU: .eabi_attribute 6, 10 @@ -659,14 +550,9 @@ ; CORTEX-A5-NOFPU: .eabi_attribute 24, 1 ; CORTEX-A5-NOFPU: .eabi_attribute 25, 1 -; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving ;; sign. This matches what the hardware would do for this architecture ;; revision. -; CORTEX-A5-NOFPU-FAST: .eabi_attribute 20, 2 -; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 21 -; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 22 -; CORTEX-A5-NOFPU-FAST: .eabi_attribute 23, 1 ; CORTEX-A8-SOFT: .cpu cortex-a8 ; CORTEX-A8-SOFT: .eabi_attribute 6, 10 @@ -712,15 +598,6 @@ ; CORTEX-A9-SOFT-NOT: .eabi_attribute 28 ; CORTEX-A9-SOFT: .eabi_attribute 38, 1 -; CORTEX-A8-SOFT-FAST-NOT: .eabi_attribute 19 -; CORTEX-A9-SOFT-FAST-NOT: .eabi_attribute 19 -;; The A9 defaults to a VFPv3 FPU, so it flushes preserving the sign when -;; -ffast-math is specified. -; CORTEX-A8-SOFT-FAST: .eabi_attribute 20, 2 -; CORTEX-A9-SOFT-FAST: .eabi_attribute 20, 2 -; CORTEX-A5-SOFT-FAST-NOT: .eabi_attribute 21 -; CORTEX-A5-SOFT-FAST-NOT: .eabi_attribute 22 -; CORTEX-A5-SOFT-FAST: .eabi_attribute 23, 1 ; CORTEX-A8-HARD: .cpu cortex-a8 ; CORTEX-A8-HARD: .eabi_attribute 6, 10 @@ -766,21 +643,6 @@ ; CORTEX-A9-HARD: .eabi_attribute 28, 1 ; CORTEX-A9-HARD: .eabi_attribute 38, 1 -; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 19 -;; The A8 defaults to a VFPv3 FPU, so it flushes preserving the sign when -;; -ffast-math is specified. -; CORTEX-A8-HARD-FAST: .eabi_attribute 20, 2 -; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 21 -; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 22 -; CORTEX-A8-HARD-FAST: .eabi_attribute 23, 1 - -; CORTEX-A9-HARD-FAST-NOT: .eabi_attribute 19 -;; The A9 defaults to a VFPv3 FPU, so it flushes preserving the sign when -;; -ffast-math is specified. -; CORTEX-A9-HARD-FAST: .eabi_attribute 20, 2 -; CORTEX-A9-HARD-FAST-NOT: .eabi_attribute 21 -; CORTEX-A9-HARD-FAST-NOT: .eabi_attribute 22 -; CORTEX-A9-HARD-FAST: .eabi_attribute 23, 1 ; CORTEX-A12-DEFAULT: .cpu cortex-a12 ; CORTEX-A12-DEFAULT: .eabi_attribute 6, 10 @@ -800,13 +662,6 @@ ; CORTEX-A12-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A12-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A12-DEFAULT-FAST-NOT: .eabi_attribute 19 -;; The A12 defaults to a VFPv3 FPU, so it flushes preserving the sign when -;; -ffast-math is specified. -; CORTEX-A12-DEFAULT-FAST: .eabi_attribute 20, 2 -; CORTEX-A12-HARD-FAST-NOT: .eabi_attribute 21 -; CORTEX-A12-HARD-FAST-NOT: .eabi_attribute 22 -; CORTEX-A12-HARD-FAST: .eabi_attribute 23, 1 ; CORTEX-A12-NOFPU: .cpu cortex-a12 ; CORTEX-A12-NOFPU: .eabi_attribute 6, 10 @@ -826,14 +681,6 @@ ; CORTEX-A12-NOFPU: .eabi_attribute 24, 1 ; CORTEX-A12-NOFPU: .eabi_attribute 25, 1 -; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 19 -;; Despite there being no FPU, we chose to flush to zero preserving -;; sign. This matches what the hardware would do for this architecture -;; revision. -; CORTEX-A12-NOFPU-FAST: .eabi_attribute 20, 2 -; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 21 -; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 22 -; CORTEX-A12-NOFPU-FAST: .eabi_attribute 23, 1 ; CORTEX-A15: .cpu cortex-a15 ; CORTEX-A15: .eabi_attribute 6, 10 @@ -857,13 +704,6 @@ ; CORTEX-A15-NOT: .eabi_attribute 28 ; CORTEX-A15: .eabi_attribute 38, 1 -; CORTEX-A15-FAST-NOT: .eabi_attribute 19 -;; The A15 defaults to a VFPv3 FPU, so it flushes preserving the sign when -;; -ffast-math is specified. -; CORTEX-A15-FAST: .eabi_attribute 20, 2 -; CORTEX-A15-FAST-NOT: .eabi_attribute 21 -; CORTEX-A15-FAST-NOT: .eabi_attribute 22 -; CORTEX-A15-FAST: .eabi_attribute 23, 1 ; CORTEX-A17-DEFAULT: .cpu cortex-a17 ; CORTEX-A17-DEFAULT: .eabi_attribute 6, 10 @@ -883,13 +723,6 @@ ; CORTEX-A17-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A17-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A17-FAST-NOT: .eabi_attribute 19 -;; The A17 defaults to a VFPv3 FPU, so it flushes preserving the sign when -;; -ffast-math is specified. -; CORTEX-A17-FAST: .eabi_attribute 20, 2 -; CORTEX-A17-FAST-NOT: .eabi_attribute 21 -; CORTEX-A17-FAST-NOT: .eabi_attribute 22 -; CORTEX-A17-FAST: .eabi_attribute 23, 1 ; CORTEX-A17-NOFPU: .cpu cortex-a17 ; CORTEX-A17-NOFPU: .eabi_attribute 6, 10 @@ -910,13 +743,6 @@ ; CORTEX-A17-NOFPU: .eabi_attribute 25, 1 ; CORTEX-A17-NOFPU-NOT: .eabi_attribute 19 -;; Despite there being no FPU, we chose to flush to zero preserving -;; sign. This matches what the hardware would do for this architecture -;; revision. -; CORTEX-A17-NOFPU-FAST: .eabi_attribute 20, 2 -; CORTEX-A17-NOFPU-FAST-NOT: .eabi_attribute 21 -; CORTEX-A17-NOFPU-FAST-NOT: .eabi_attribute 22 -; CORTEX-A17-NOFPU-FAST: .eabi_attribute 23, 1 ; Test flags -enable-no-trapping-fp-math and -denormal-fp-math: ; NO-TRAPPING-MATH: .eabi_attribute 21, 0 @@ -946,16 +772,6 @@ ; CORTEX-M0-NOT: .eabi_attribute 28 ; CORTEX-M0: .eabi_attribute 38, 1 -; CORTEX-M0-FAST-NOT: .eabi_attribute 19 -;; Despite the M0 CPU having no FPU in this scenario, we chose to -;; flush to positive zero here. There's no hardware support doing -;; this, but the fast maths software library might and such behaviour -;; would match hardware support on this architecture revision if it -;; existed. -; CORTEX-M0-FAST-NOT: .eabi_attribute 20 -; CORTEX-M0-FAST-NOT: .eabi_attribute 21 -; CORTEX-M0-FAST-NOT: .eabi_attribute 22 -; CORTEX-M0-FAST: .eabi_attribute 23, 1 ; CORTEX-M0PLUS: .cpu cortex-m0plus ; CORTEX-M0PLUS: .eabi_attribute 6, 12 @@ -978,16 +794,6 @@ ; CORTEX-M0PLUS-NOT: .eabi_attribute 28 ; CORTEX-M0PLUS: .eabi_attribute 38, 1 -; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 19 -;; Despite the M0+ CPU having no FPU in this scenario, we chose to -;; flush to positive zero here. There's no hardware support doing -;; this, but the fast maths software library might and such behaviour -;; would match hardware support on this architecture revision if it -;; existed. -; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 20 -; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 21 -; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 22 -; CORTEX-M0PLUS-FAST: .eabi_attribute 23, 1 ; CORTEX-M1: .cpu cortex-m1 ; CORTEX-M1: .eabi_attribute 6, 12 @@ -1010,16 +816,6 @@ ; CORTEX-M1-NOT: .eabi_attribute 28 ; CORTEX-M1: .eabi_attribute 38, 1 -; CORTEX-M1-FAST-NOT: .eabi_attribute 19 -;; Despite the M1 CPU having no FPU in this scenario, we chose to -;; flush to positive zero here. There's no hardware support doing -;; this, but the fast maths software library might and such behaviour -;; would match hardware support on this architecture revision if it -;; existed. -; CORTEX-M1-FAST-NOT: .eabi_attribute 20 -; CORTEX-M1-FAST-NOT: .eabi_attribute 21 -; CORTEX-M1-FAST-NOT: .eabi_attribute 22 -; CORTEX-M1-FAST: .eabi_attribute 23, 1 ; SC000: .cpu sc000 ; SC000: .eabi_attribute 6, 12 @@ -1041,16 +837,6 @@ ; SC000-NOT: .eabi_attribute 28 ; SC000: .eabi_attribute 38, 1 -; SC000-FAST-NOT: .eabi_attribute 19 -;; Despite the SC000 CPU having no FPU in this scenario, we chose to -;; flush to positive zero here. There's no hardware support doing -;; this, but the fast maths software library might and such behaviour -;; would match hardware support on this architecture revision if it -;; existed. -; SC000-FAST-NOT: .eabi_attribute 20 -; SC000-FAST-NOT: .eabi_attribute 21 -; SC000-FAST-NOT: .eabi_attribute 22 -; SC000-FAST: .eabi_attribute 23, 1 ; CORTEX-M3: .cpu cortex-m3 ; CORTEX-M3: .eabi_attribute 6, 10 @@ -1073,14 +859,6 @@ ; CORTEX-M3-NOT: .eabi_attribute 28 ; CORTEX-M3: .eabi_attribute 38, 1 -; CORTEX-M3-FAST-NOT: .eabi_attribute 19 -;; Despite there being no FPU, we chose to flush to zero preserving -;; sign. This matches what the hardware would do for this architecture -;; revision. -; CORTEX-M3-FAST: .eabi_attribute 20, 2 -; CORTEX-M3-FAST-NOT: .eabi_attribute 21 -; CORTEX-M3-FAST-NOT: .eabi_attribute 22 -; CORTEX-M3-FAST: .eabi_attribute 23, 1 ; SC300: .cpu sc300 ; SC300: .eabi_attribute 6, 10 @@ -1103,14 +881,6 @@ ; SC300-NOT: .eabi_attribute 28 ; SC300: .eabi_attribute 38, 1 -; SC300-FAST-NOT: .eabi_attribute 19 -;; Despite there being no FPU, we chose to flush to zero preserving -;; sign. This matches what the hardware would do for this architecture -;; revision. -; SC300-FAST: .eabi_attribute 20, 2 -; SC300-FAST-NOT: .eabi_attribute 21 -; SC300-FAST-NOT: .eabi_attribute 22 -; SC300-FAST: .eabi_attribute 23, 1 ; CORTEX-M4-SOFT: .cpu cortex-m4 ; CORTEX-M4-SOFT: .eabi_attribute 6, 13 @@ -1134,13 +904,6 @@ ; CORTEX-M4-SOFT-NOT: .eabi_attribute 28 ; CORTEX-M4-SOFT: .eabi_attribute 38, 1 -; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 19 -;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when -;; -ffast-math is specified. -; CORTEX-M4-SOFT-FAST: .eabi_attribute 20, 2 -; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 21 -; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 22 -; CORTEX-M4-SOFT-FAST: .eabi_attribute 23, 1 ; CORTEX-M4-HARD: .cpu cortex-m4 ; CORTEX-M4-HARD: .eabi_attribute 6, 13 @@ -1164,13 +927,6 @@ ; CORTEX-M4-HARD: .eabi_attribute 28, 1 ; CORTEX-M4-HARD: .eabi_attribute 38, 1 -; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 19 -;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when -;; -ffast-math is specified. -; CORTEX-M4-HARD-FAST: .eabi_attribute 20, 2 -; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 21 -; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 22 -; CORTEX-M4-HARD-FAST: .eabi_attribute 23, 1 ; CORTEX-M7: .cpu cortex-m7 ; CORTEX-M7: .eabi_attribute 6, 13 @@ -1197,16 +953,6 @@ ; CORTEX-M7: .eabi_attribute 38, 1 ; CORTEX-M7: .eabi_attribute 14, 0 -; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 19 -;; The M7 has the ARMv8 FP unit, which always flushes preserving sign. -; CORTEX-M7-FAST: .eabi_attribute 20, 2 -;; Despite there being no FPU, we chose to flush to zero preserving -;; sign. This matches what the hardware would do for this architecture -;; revision. -; CORTEX-M7-NOFPU-FAST: .eabi_attribute 20, 2 -; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 21 -; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 22 -; CORTEX-M7-NOFPU-FAST: .eabi_attribute 23, 1 ; CORTEX-R4: .cpu cortex-r4 ; CORTEX-R4: .eabi_attribute 6, 10 @@ -1273,12 +1019,6 @@ ; CORTEX-R5-NOT: .eabi_attribute 28 ; CORTEX-R5: .eabi_attribute 38, 1 -; CORTEX-R5-FAST-NOT: .eabi_attribute 19 -;; The R5 has the VFPv3 FP unit, which always flushes preserving sign. -; CORTEX-R5-FAST: .eabi_attribute 20, 2 -; CORTEX-R5-FAST-NOT: .eabi_attribute 21 -; CORTEX-R5-FAST-NOT: .eabi_attribute 22 -; CORTEX-R5-FAST: .eabi_attribute 23, 1 ; CORTEX-R7: .cpu cortex-r7 ; CORTEX-R7: .eabi_attribute 6, 10 @@ -1301,12 +1041,6 @@ ; CORTEX-R7-NOT: .eabi_attribute 28 ; CORTEX-R7: .eabi_attribute 38, 1 -; CORTEX-R7-FAST-NOT: .eabi_attribute 19 -;; The R7 has the VFPv3 FP unit, which always flushes preserving sign. -; CORTEX-R7-FAST: .eabi_attribute 20, 2 -; CORTEX-R7-FAST-NOT: .eabi_attribute 21 -; CORTEX-R7-FAST-NOT: .eabi_attribute 22 -; CORTEX-R7-FAST: .eabi_attribute 23, 1 ; CORTEX-R8: .cpu cortex-r8 ; CORTEX-R8: .eabi_attribute 6, 10 @@ -1329,12 +1063,6 @@ ; CORTEX-R8-NOT: .eabi_attribute 28 ; CORTEX-R8: .eabi_attribute 38, 1 -; CORTEX-R8-FAST-NOT: .eabi_attribute 19 -;; The R8 has the VFPv3 FP unit, which always flushes preserving sign. -; CORTEX-R8-FAST: .eabi_attribute 20, 2 -; CORTEX-R8-FAST-NOT: .eabi_attribute 21 -; CORTEX-R8-FAST-NOT: .eabi_attribute 22 -; CORTEX-R8-FAST: .eabi_attribute 23, 1 ; CORTEX-A32: .cpu cortex-a32 ; CORTEX-A32: .eabi_attribute 6, 14 @@ -1359,12 +1087,6 @@ ; CORTEX-A32-NOT: .eabi_attribute 28 ; CORTEX-A32: .eabi_attribute 38, 1 -; CORTEX-A32-FAST-NOT: .eabi_attribute 19 -;; The A32 has the ARMv8 FP unit, which always flushes preserving sign. -; CORTEX-A32-FAST: .eabi_attribute 20, 2 -; CORTEX-A32-FAST-NOT: .eabi_attribute 21 -; CORTEX-A32-FAST-NOT: .eabi_attribute 22 -; CORTEX-A32-FAST: .eabi_attribute 23, 1 ; CORTEX-M23: .cpu cortex-m23 ; CORTEX-M23: .eabi_attribute 6, 16 @@ -1430,11 +1152,6 @@ ; CORTEX-M35P: .eabi_attribute 38, 1 ; CORTEX-M35P: .eabi_attribute 14, 0 -; CORTEX-M33-FAST-NOT: .eabi_attribute 19 -; CORTEX-M33-FAST: .eabi_attribute 20, 2 -; CORTEX-M33-FAST-NOT: .eabi_attribute 21 -; CORTEX-M33-FAST-NOT: .eabi_attribute 22 -; CORTEX-M33-FAST: .eabi_attribute 23, 1 ; CORTEX-A35: .cpu cortex-a35 ; CORTEX-A35: .eabi_attribute 6, 14 @@ -1459,12 +1176,6 @@ ; CORTEX-A35-NOT: .eabi_attribute 28 ; CORTEX-A35: .eabi_attribute 38, 1 -; CORTEX-A35-FAST-NOT: .eabi_attribute 19 -;; The A35 has the ARMv8 FP unit, which always flushes preserving sign. -; CORTEX-A35-FAST: .eabi_attribute 20, 2 -; CORTEX-A35-FAST-NOT: .eabi_attribute 21 -; CORTEX-A35-FAST-NOT: .eabi_attribute 22 -; CORTEX-A35-FAST: .eabi_attribute 23, 1 ; CORTEX-A53: .cpu cortex-a53 ; CORTEX-A53: .eabi_attribute 6, 14 @@ -1489,12 +1200,6 @@ ; CORTEX-A53-NOT: .eabi_attribute 28 ; CORTEX-A53: .eabi_attribute 38, 1 -; CORTEX-A53-FAST-NOT: .eabi_attribute 19 -;; The A53 has the ARMv8 FP unit, which always flushes preserving sign. -; CORTEX-A53-FAST: .eabi_attribute 20, 2 -; CORTEX-A53-FAST-NOT: .eabi_attribute 21 -; CORTEX-A53-FAST-NOT: .eabi_attribute 22 -; CORTEX-A53-FAST: .eabi_attribute 23, 1 ; CORTEX-A57: .cpu cortex-a57 ; CORTEX-A57: .eabi_attribute 6, 14 @@ -1519,12 +1224,6 @@ ; CORTEX-A57-NOT: .eabi_attribute 28 ; CORTEX-A57: .eabi_attribute 38, 1 -; CORTEX-A57-FAST-NOT: .eabi_attribute 19 -;; The A57 has the ARMv8 FP unit, which always flushes preserving sign. -; CORTEX-A57-FAST: .eabi_attribute 20, 2 -; CORTEX-A57-FAST-NOT: .eabi_attribute 21 -; CORTEX-A57-FAST-NOT: .eabi_attribute 22 -; CORTEX-A57-FAST: .eabi_attribute 23, 1 ; CORTEX-A72: .cpu cortex-a72 ; CORTEX-A72: .eabi_attribute 6, 14 @@ -1549,12 +1248,6 @@ ; CORTEX-A72-NOT: .eabi_attribute 28 ; CORTEX-A72: .eabi_attribute 38, 1 -; CORTEX-A72-FAST-NOT: .eabi_attribute 19 -;; The A72 has the ARMv8 FP unit, which always flushes preserving sign. -; CORTEX-A72-FAST: .eabi_attribute 20, 2 -; CORTEX-A72-FAST-NOT: .eabi_attribute 21 -; CORTEX-A72-FAST-NOT: .eabi_attribute 22 -; CORTEX-A72-FAST: .eabi_attribute 23, 1 ; CORTEX-A73: .cpu cortex-a73 ; CORTEX-A73: .eabi_attribute 6, 14 @@ -1580,12 +1273,6 @@ ; CORTEX-A73: .eabi_attribute 38, 1 ; CORTEX-A73: .eabi_attribute 14, 0 -; EXYNOS-FAST-NOT: .eabi_attribute 19 -;; The Exynos processors have the ARMv8 FP unit, which always flushes preserving sign. -; EXYNOS-FAST: .eabi_attribute 20, 2 -; EXYNOS-FAST-NOT: .eabi_attribute 21 -; EXYNOS-FAST-NOT: .eabi_attribute 22 -; EXYNOS-FAST: .eabi_attribute 23, 1 ; EXYNOS-M3: .cpu exynos-m3 ; EXYNOS-M3: .eabi_attribute 6, 14 @@ -1684,12 +1371,6 @@ ; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 28 ; GENERIC-ARMV8_1-A: .eabi_attribute 38, 1 -; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 19 -;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign. -; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 20, 2 -; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 21 -; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 22 -; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 23, 1 ; RELOC-PIC: .eabi_attribute 15, 1 ; RELOC-PIC: .eabi_attribute 16, 1 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll new file mode 100644 index 000000000000..972a4708994d --- /dev/null +++ b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll @@ -0,0 +1,39 @@ +;; Test if a potential indirect call target function which has internal linkage and +;; address taken has its type ID emitted to callgraph section. +;; This test also makes sure that callback functions which meet the above constraint +;; are handled correctly. + +; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -o - < %s | FileCheck %s + +declare !type !0 void @_Z6doWorkPFviE(ptr) + +define i32 @_Z4testv() !type !1 { +entry: + call void @_Z6doWorkPFviE(ptr nonnull @_ZL10myCallbacki) + ret i32 0 +} + +; CHECK: _ZL10myCallbacki: +; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: +define internal void @_ZL10myCallbacki(i32 %value) !type !2 { +entry: + %sink = alloca i32, align 4 + store volatile i32 %value, ptr %sink, align 4 + %i1 = load volatile i32, ptr %sink, align 4 + ret void +} + +!0 = !{i64 0, !"_ZTSFvPFviEE.generalized"} +!1 = !{i64 0, !"_ZTSFivE.generalized"} +!2 = !{i64 0, !"_ZTSFviE.generalized"} + +; CHECK: .section .llvm.callgraph,"o",%progbits,.text +;; Version +; CHECK-NEXT: .byte 0 +;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. +; CHECK-NEXT: .byte 1 +;; Function Entry PC +; CHECK-NEXT: .long [[LABEL_FUNC]] +;; Function type ID -5212364466660467813 +; CHECK-NEXT: .long 1154849691 +; CHECK-NEXT: .long 3081369122 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll new file mode 100644 index 000000000000..ec8d5b8ad94a --- /dev/null +++ b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll @@ -0,0 +1,63 @@ +;; Test if temporary labels are generated for each indirect callsite. +;; Test if the .llvm.callgraph section contains the MD5 hash of callees' type (type id) +;; is correctly paired with its corresponding temporary label generated for indirect +;; call sites annotated with !callee_type metadata. +;; Test if the .llvm.callgraph section contains unique direct callees. + +; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -o - < %s | FileCheck %s + +declare !type !0 void @direct_foo() +declare !type !1 i32 @direct_bar(i8) +declare !type !2 ptr @direct_baz(ptr) + +; CHECK: ball: +; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: +define ptr @ball() { +entry: + call void @direct_foo() + %fp_foo_val = load ptr, ptr null, align 8 + call void (...) %fp_foo_val(), !callee_type !0 + call void @direct_foo() + %fp_bar_val = load ptr, ptr null, align 8 + %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2 + %call_fp_bar_direct = call i32 @direct_bar(i8 1) + %fp_baz_val = load ptr, ptr null, align 8 + %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4 + call void @direct_foo() + %call_fp_baz_direct = call ptr @direct_baz(ptr null) + call void @direct_foo() + ret ptr %call_fp_baz +} + +!0 = !{!1} +!1 = !{i64 0, !"_ZTSFvE.generalized"} +!2 = !{!3} +!3 = !{i64 0, !"_ZTSFicE.generalized"} +!4 = !{!5} +!5 = !{i64 0, !"_ZTSFPvS_E.generalized"} + +; CHECK: .section .llvm.callgraph,"o",%progbits,.text +;; Version +; CHECK-NEXT: .byte 0 +;; Flags +; CHECK-NEXT: .byte 7 +;; Function Entry PC +; CHECK-NEXT: .long [[LABEL_FUNC]] +;; Function type ID -- set to 0 as no type metadata attached to function. +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 0 +;; Number of unique direct callees. +; CHECK-NEXT: .byte 3 +;; Direct callees. +; CHECK-NEXT: .long direct_foo +; CHECK-NEXT: .long direct_bar +; CHECK-NEXT: .long direct_baz +;; Number of unique indirect target type IDs. +; CHECK-NEXT: .byte 3 +;; Indirect type IDs. +; CHECK-NEXT: .long 838288420 +; CHECK-NEXT: .long 1053552373 +; CHECK-NEXT: .long 1505527380 +; CHECK-NEXT: .long 814631809 +; CHECK-NEXT: .long 342417018 +; CHECK-NEXT: .long 2013108216 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll new file mode 100644 index 000000000000..80360041c106 --- /dev/null +++ b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll @@ -0,0 +1,34 @@ +;; Tests that we store the type identifiers in .llvm.callgraph section of the object file for tailcalls. + +; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ +; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s + +define i32 @check_tailcall(ptr %func, i8 %x) !type !0 { +entry: + %call = tail call i32 %func(i8 signext %x), !callee_type !1 + ret i32 %call +} + +define i32 @main(i32 %argc) !type !3 { +entry: + %andop = and i32 %argc, 1 + %cmp = icmp eq i32 %andop, 0 + %foo.bar = select i1 %cmp, ptr @foo, ptr @bar + %call.i = tail call i32 %foo.bar(i8 signext 97), !callee_type !1 + ret i32 %call.i +} + +declare !type !2 i32 @foo(i8 signext) + +declare !type !2 i32 @bar(i8 signext) + +!0 = !{i64 0, !"_ZTSFiPvcE.generalized"} +!1 = !{!2} +!2 = !{i64 0, !"_ZTSFicE.generalized"} +!3 = !{i64 0, !"_ZTSFiiE.generalized"} + +; CHECK: Hex dump of section '.llvm.callgraph': +; CHECK-NEXT: 0x00000000 00050000 00008e19 0b7f3326 e3000154 +; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05100000 00a150b8 +;; Verify that the type id 0x308e4b8159bc8654 is in section. +; CHECK-NEXT: 0x00000020 3e0cfe3c b2015486 bc59814b 8e30 diff --git a/llvm/test/CodeGen/ARM/call-graph-section.ll b/llvm/test/CodeGen/ARM/call-graph-section.ll new file mode 100644 index 000000000000..167cc6f3c73b --- /dev/null +++ b/llvm/test/CodeGen/ARM/call-graph-section.ll @@ -0,0 +1,37 @@ +;; Tests that we store the type identifiers in .llvm.callgraph section of the object file. + +; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ +; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s + +declare !type !0 void @foo() + +declare !type !1 i32 @bar(i8) + +declare !type !2 ptr @baz(ptr) + +define void @main() { +entry: + %fp_foo_val = load ptr, ptr null, align 8 + call void (...) %fp_foo_val(), !callee_type !1 + %fp_bar_val = load ptr, ptr null, align 8 + %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !3 + %fp_baz_val = load ptr, ptr null, align 8 + %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4 + ret void +} + +;; Check that the numeric type id (md5 hash) for the below type ids are emitted +;; to the callgraph section. +!0 = !{i64 0, !"_ZTSFvE.generalized"} +!1 = !{!0} +!2 = !{i64 0, !"_ZTSFicE.generalized"} +!3 = !{!2} +!4 = !{!5} +!5 = !{i64 0, !"_ZTSFPvS_E.generalized"} + +;; Make sure following type IDs are in call graph section +;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814 +; CHECK: Hex dump of section '.llvm.callgraph': +; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000324 +; CHECK-NEXT: 0x00000010 44f731f5 eecb3e54 86bc5981 4b8e307a +; CHECK-NEXT: 0x00000020 de6814f8 97fd77 diff --git a/llvm/test/CodeGen/ARM/carry.ll b/llvm/test/CodeGen/ARM/carry.ll index 558e2b0e43f7..a652241dac5b 100644 --- a/llvm/test/CodeGen/ARM/carry.ll +++ b/llvm/test/CodeGen/ARM/carry.ll @@ -1,61 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s define i64 @f1(i64 %a, i64 %b) { ; CHECK-LABEL: f1: -; CHECK: subs r -; CHECK: sbc r +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: sbc r1, r1, r3 +; CHECK-NEXT: bx lr entry: - %tmp = sub i64 %a, %b - ret i64 %tmp + %tmp = sub i64 %a, %b + ret i64 %tmp } define i64 @f2(i64 %a, i64 %b) { ; CHECK-LABEL: f2: -; CHECK: lsl r -; CHECK: orr r -; CHECK: rsbs r -; CHECK: sbc r +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsl r1, r1, #1 +; CHECK-NEXT: orr r1, r1, r0, lsr #31 +; CHECK-NEXT: rsbs r0, r2, r0, lsl #1 +; CHECK-NEXT: sbc r1, r1, r3 +; CHECK-NEXT: bx lr entry: - %tmp1 = shl i64 %a, 1 - %tmp2 = sub i64 %tmp1, %b - ret i64 %tmp2 + %tmp1 = shl i64 %a, 1 + %tmp2 = sub i64 %tmp1, %b + ret i64 %tmp2 } ; add with live carry define i64 @f3(i32 %al, i32 %bl) { ; CHECK-LABEL: f3: -; CHECK: adds r -; CHECK: adc r +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, r0, r1 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: adcs r0, r1, #0 +; CHECK-NEXT: adc r1, r2, #0 +; CHECK-NEXT: bx lr entry: - ; unsigned wide add - %aw = zext i32 %al to i64 - %bw = zext i32 %bl to i64 - %cw = add i64 %aw, %bw - ; ch == carry bit - %ch = lshr i64 %cw, 32 - %dw = add i64 %ch, %bw - ret i64 %dw + ; unsigned wide add + %aw = zext i32 %al to i64 + %bw = zext i32 %bl to i64 + %cw = add i64 %aw, %bw + ; ch == carry bit + %ch = lshr i64 %cw, 32 + %dw = add i64 %ch, %bw + ret i64 %dw } ; rdar://10073745 define i64 @f4(i64 %x) nounwind readnone { -entry: ; CHECK-LABEL: f4: -; CHECK: rsbs r -; CHECK: rsc r +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: rsc r1, r1, #0 +; CHECK-NEXT: bx lr +entry: %0 = sub nsw i64 0, %x ret i64 %0 } ; rdar://12559385 define i64 @f5(i32 %vi) { -entry: ; CHECK-LABEL: f5: -; CHECK: movw [[REG:r[0-9]+]], #36102 -; CHECK: sbc r{{[0-9]+}}, r{{[0-9]+}}, [[REG]] - %v0 = zext i32 %vi to i64 - %v1 = xor i64 %v0, -155057456198619 - %v4 = add i64 %v1, 155057456198619 - %v5 = add i64 %v4, %v1 - ret i64 %v5 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r1, #19493 +; CHECK-NEXT: movw r2, #29433 +; CHECK-NEXT: movt r1, #57191 +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: movw r3, #46043 +; CHECK-NEXT: movt r2, #65535 +; CHECK-NEXT: adds r0, r0, r0 +; CHECK-NEXT: movw r1, #36102 +; CHECK-NEXT: sbc r2, r2, r1 +; CHECK-NEXT: movt r3, #8344 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adc r1, r2, r1 +; CHECK-NEXT: bx lr +entry: + %v0 = zext i32 %vi to i64 + %v1 = xor i64 %v0, -155057456198619 + %v4 = add i64 %v1, 155057456198619 + %v5 = add i64 %v4, %v1 + ret i64 %v5 } diff --git a/llvm/test/CodeGen/ARM/combine-movc-sub.ll b/llvm/test/CodeGen/ARM/combine-movc-sub.ll index ca5d08944354..8ca4c4320987 100644 --- a/llvm/test/CodeGen/ARM/combine-movc-sub.ll +++ b/llvm/test/CodeGen/ARM/combine-movc-sub.ll @@ -27,11 +27,11 @@ define hidden fastcc ptr @test(ptr %Search, ptr %ClauseList, i32 %Level, ptr noc ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: sub.w r7, r2, #32 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: sub.w r8, r2, #32 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: movs r0, #1 ; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: add.w r6, r0, r7, lsr #5 +; CHECK-NEXT: add.w r7, r0, r8, lsr #5 ; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: b .LBB0_2 @@ -44,16 +44,16 @@ define hidden fastcc ptr @test(ptr %Search, ptr %ClauseList, i32 %Level, ptr noc ; CHECK-NEXT: mov r2, r4 ; CHECK-NEXT: cmp r4, #31 ; CHECK-NEXT: ldr r0, [r1, #16] -; CHECK-NEXT: add.w r0, r0, r6, lsl #2 +; CHECK-NEXT: add.w r0, r0, r7, lsl #2 ; CHECK-NEXT: ldr r0, [r0, #40] ; CHECK-NEXT: it hi -; CHECK-NEXT: andhi r2, r7, #31 +; CHECK-NEXT: andhi r2, r8, #31 ; CHECK-NEXT: lsrs r0, r2 ; CHECK-NEXT: lsls r0, r0, #31 ; CHECK-NEXT: beq .LBB0_1 ; CHECK-NEXT: @ %bb.3: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl foo ; CHECK-NEXT: str.w r9, [r5, #4] ; CHECK-NEXT: b .LBB0_1 diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir index 1bee32f4c90c..fe23e8594c94 100644 --- a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir +++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir @@ -22,15 +22,16 @@ body: | ; CHECK-LABEL: name: test_groups ; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4 - ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg - ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg - ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg - ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm + ; CHECK-NEXT: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg + ; CHECK-NEXT: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg + ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0 renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg - renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg + renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/extract-bits.ll b/llvm/test/CodeGen/ARM/extract-bits.ll new file mode 100644 index 000000000000..d717806098fb --- /dev/null +++ b/llvm/test/CodeGen/ARM/extract-bits.ll @@ -0,0 +1,4591 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M +; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A +; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T +; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M + +; Patterns: +; a) (x >> start) & (1 << nbits) - 1 +; b) (x >> start) & ~(-1 << nbits) +; c) (x >> start) & (-1 >> (32 - y)) +; d) (x >> start) << (32 - y) >> (32 - y) +; are equivalent. + +; ---------------------------------------------------------------------------- ; +; Pattern a. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_a0: +; V7M: @ %bb.0: +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: movs r1, #1 +; V7M-NEXT: lsls r1, r2 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_a0: +; V7A: @ %bb.0: +; V7A-NEXT: mov r12, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r2, r3, r12, lsl r2 +; V7A-NEXT: and r0, r2, r0, lsr r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_a0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsls r1, r2 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_a0: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #1 +; V6M-NEXT: lsls r1, r2 +; V6M-NEXT: subs r1, r1, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %shifted = lshr i32 %val, %numskipbits + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_a0_arithmetic: +; V7M: @ %bb.0: +; V7M-NEXT: asrs r0, r1 +; V7M-NEXT: movs r1, #1 +; V7M-NEXT: lsls r1, r2 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_a0_arithmetic: +; V7A: @ %bb.0: +; V7A-NEXT: mov r12, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r2, r3, r12, lsl r2 +; V7A-NEXT: and r0, r2, r0, asr r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_a0_arithmetic: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: asrs r0, r1 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsls r1, r2 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_a0_arithmetic: +; V6M: @ %bb.0: +; V6M-NEXT: asrs r0, r1 +; V6M-NEXT: movs r1, #1 +; V6M-NEXT: lsls r1, r2 +; V6M-NEXT: subs r1, r1, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %shifted = ashr i32 %val, %numskipbits + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bextr32_a1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: movs r1, #1 +; V7M-NEXT: lsls r1, r2 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_a1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: mov r12, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r2, r3, r12, lsl r2 +; V7A-NEXT: and r0, r2, r0, lsr r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_a1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsls r1, r2 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_a1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #1 +; V6M-NEXT: lsls r1, r2 +; V6M-NEXT: subs r1, r1, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %skip = zext i8 %numskipbits to i32 + %shifted = lshr i32 %val, %skip + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_a2_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: movs r1, #1 +; V7M-NEXT: lsls r1, r2 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_a2_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mov r12, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r2, r3, r12, lsl r2 +; V7A-NEXT: and r0, r2, r0, lsr r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_a2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsls r1, r2 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_a2_load: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r3, [r0] +; V6M-NEXT: lsrs r3, r1 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: lsls r0, r2 +; V6M-NEXT: subs r0, r0, #1 +; V6M-NEXT: ands r0, r3 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %shifted = lshr i32 %val, %numskipbits + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bextr32_a3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: movs r1, #1 +; V7M-NEXT: lsls r1, r2 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_a3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mov r12, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r2, r3, r12, lsl r2 +; V7A-NEXT: and r0, r2, r0, lsr r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_a3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsls r1, r2 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_a3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r3, [r0] +; V6M-NEXT: lsrs r3, r1 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: lsls r0, r2 +; V6M-NEXT: subs r0, r0, #1 +; V6M-NEXT: ands r0, r3 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %skip = zext i8 %numskipbits to i32 + %shifted = lshr i32 %val, %skip + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_a4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: movs r1, #1 +; V7M-NEXT: lsls r1, r2 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_a4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: mov r12, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r2, r3, r12, lsl r2 +; V7A-NEXT: and r0, r2, r0, lsr r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_a4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsls r1, r2 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_a4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #1 +; V6M-NEXT: lsls r1, r2 +; V6M-NEXT: subs r1, r1, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %shifted = lshr i32 %val, %numskipbits + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %shifted, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_a0: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: mov.w lr, #1 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: rsb.w r4, r12, #32 +; V7M-NEXT: subs.w r3, r12, #32 +; V7M-NEXT: lsr.w r4, lr, r4 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r4, lr, r3 +; V7M-NEXT: lsl.w r3, lr, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: subs r3, #1 +; V7M-NEXT: sbc r12, r4, #0 +; V7M-NEXT: rsb.w r4, r2, #32 +; V7M-NEXT: lsl.w r4, r1, r4 +; V7M-NEXT: orrs r0, r4 +; V7M-NEXT: subs.w r4, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r4 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: and.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: and.w r1, r1, r12 +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_a0: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r11, lr} +; V7A-NEXT: push {r4, r5, r11, lr} +; V7A-NEXT: ldr lr, [sp, #16] +; V7A-NEXT: mov r5, #1 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: rsb r12, lr, #32 +; V7A-NEXT: subs r4, lr, #32 +; V7A-NEXT: lsr r3, r5, r12 +; V7A-NEXT: lslpl r3, r5, r4 +; V7A-NEXT: lsl r5, r5, lr +; V7A-NEXT: movwpl r5, #0 +; V7A-NEXT: rsb r4, r2, #32 +; V7A-NEXT: subs r5, r5, #1 +; V7A-NEXT: sbc r3, r3, #0 +; V7A-NEXT: orr r0, r0, r1, lsl r4 +; V7A-NEXT: subs r4, r2, #32 +; V7A-NEXT: lsrpl r0, r1, r4 +; V7A-NEXT: lsr r1, r1, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: and r0, r5, r0 +; V7A-NEXT: and r1, r3, r1 +; V7A-NEXT: pop {r4, r5, r11, pc} +; +; V7A-T-LABEL: bextr64_a0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: mov.w lr, #1 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: rsb.w r4, r12, #32 +; V7A-T-NEXT: subs.w r3, r12, #32 +; V7A-T-NEXT: lsr.w r4, lr, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r4, lr, r3 +; V7A-T-NEXT: lsl.w r3, lr, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r3, #0 +; V7A-T-NEXT: subs r3, #1 +; V7A-T-NEXT: sbc r12, r4, #0 +; V7A-T-NEXT: rsb.w r4, r2, #32 +; V7A-T-NEXT: lsl.w r4, r1, r4 +; V7A-T-NEXT: orrs r0, r4 +; V7A-T-NEXT: subs.w r4, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r4 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: and.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: and.w r1, r1, r12 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_a0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, r7, lr} +; V6M-NEXT: push {r4, r5, r6, r7, lr} +; V6M-NEXT: .pad #12 +; V6M-NEXT: sub sp, #12 +; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill +; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill +; V6M-NEXT: mov r6, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r7, #0 +; V6M-NEXT: ldr r2, [sp, #32] +; V6M-NEXT: mov r1, r7 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: subs r5, r0, #1 +; V6M-NEXT: sbcs r4, r7 +; V6M-NEXT: mov r0, r6 +; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: add sp, #12 +; V6M-NEXT: pop {r4, r5, r6, r7, pc} + %shifted = lshr i64 %val, %numskipbits + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_a0_arithmetic: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: mov.w lr, #1 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: rsb.w r4, r12, #32 +; V7M-NEXT: subs.w r3, r12, #32 +; V7M-NEXT: lsr.w r4, lr, r4 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r4, lr, r3 +; V7M-NEXT: lsl.w r3, lr, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: subs r3, #1 +; V7M-NEXT: sbc r12, r4, #0 +; V7M-NEXT: rsb.w r4, r2, #32 +; V7M-NEXT: lsl.w r4, r1, r4 +; V7M-NEXT: orrs r0, r4 +; V7M-NEXT: subs.w r4, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: asrpl.w r0, r1, r4 +; V7M-NEXT: asr.w r2, r1, r2 +; V7M-NEXT: and.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: asrpl r2, r1, #31 +; V7M-NEXT: and.w r1, r12, r2 +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_a0_arithmetic: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r11, lr} +; V7A-NEXT: push {r4, r5, r11, lr} +; V7A-NEXT: ldr lr, [sp, #16] +; V7A-NEXT: mov r5, #1 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: rsb r12, lr, #32 +; V7A-NEXT: subs r4, lr, #32 +; V7A-NEXT: lsr r3, r5, r12 +; V7A-NEXT: lslpl r3, r5, r4 +; V7A-NEXT: lsl r5, r5, lr +; V7A-NEXT: movwpl r5, #0 +; V7A-NEXT: rsb r4, r2, #32 +; V7A-NEXT: subs r5, r5, #1 +; V7A-NEXT: sbc r3, r3, #0 +; V7A-NEXT: orr r0, r0, r1, lsl r4 +; V7A-NEXT: subs r4, r2, #32 +; V7A-NEXT: asr r2, r1, r2 +; V7A-NEXT: asrpl r2, r1, #31 +; V7A-NEXT: asrpl r0, r1, r4 +; V7A-NEXT: and r1, r3, r2 +; V7A-NEXT: and r0, r5, r0 +; V7A-NEXT: pop {r4, r5, r11, pc} +; +; V7A-T-LABEL: bextr64_a0_arithmetic: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: mov.w lr, #1 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: rsb.w r4, r12, #32 +; V7A-T-NEXT: subs.w r3, r12, #32 +; V7A-T-NEXT: lsr.w r4, lr, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r4, lr, r3 +; V7A-T-NEXT: lsl.w r3, lr, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r3, #0 +; V7A-T-NEXT: subs r3, #1 +; V7A-T-NEXT: sbc r12, r4, #0 +; V7A-T-NEXT: rsb.w r4, r2, #32 +; V7A-T-NEXT: lsl.w r4, r1, r4 +; V7A-T-NEXT: orrs r0, r4 +; V7A-T-NEXT: subs.w r4, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: asrpl.w r0, r1, r4 +; V7A-T-NEXT: asr.w r2, r1, r2 +; V7A-T-NEXT: and.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: asrpl r2, r1, #31 +; V7A-T-NEXT: and.w r1, r12, r2 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_a0_arithmetic: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, r7, lr} +; V6M-NEXT: push {r4, r5, r6, r7, lr} +; V6M-NEXT: .pad #12 +; V6M-NEXT: sub sp, #12 +; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill +; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill +; V6M-NEXT: mov r6, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r7, #0 +; V6M-NEXT: ldr r2, [sp, #32] +; V6M-NEXT: mov r1, r7 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: subs r5, r0, #1 +; V6M-NEXT: sbcs r4, r7 +; V6M-NEXT: mov r0, r6 +; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; V6M-NEXT: bl __aeabi_lasr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: add sp, #12 +; V6M-NEXT: pop {r4, r5, r6, r7, pc} + %shifted = ashr i64 %val, %numskipbits + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bextr64_a1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: rsb.w r4, r3, #32 +; V7M-NEXT: mov.w lr, #1 +; V7M-NEXT: subs.w r12, r3, #32 +; V7M-NEXT: lsl.w r3, lr, r3 +; V7M-NEXT: lsr.w r4, lr, r4 +; V7M-NEXT: lsr.w r0, r0, r2 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r4, lr, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: subs r3, #1 +; V7M-NEXT: sbc r12, r4, #0 +; V7M-NEXT: rsb.w r4, r2, #32 +; V7M-NEXT: lsl.w r4, r1, r4 +; V7M-NEXT: orrs r0, r4 +; V7M-NEXT: subs.w r4, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r4 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: and.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: and.w r1, r1, r12 +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_a1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, lr} +; V7A-NEXT: push {r4, lr} +; V7A-NEXT: rsb r12, r3, #32 +; V7A-NEXT: mov lr, #1 +; V7A-NEXT: subs r4, r3, #32 +; V7A-NEXT: lsl r3, lr, r3 +; V7A-NEXT: lsr r12, lr, r12 +; V7A-NEXT: movwpl r3, #0 +; V7A-NEXT: lslpl r12, lr, r4 +; V7A-NEXT: rsb r4, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: subs r3, r3, #1 +; V7A-NEXT: sbc r12, r12, #0 +; V7A-NEXT: orr r0, r0, r1, lsl r4 +; V7A-NEXT: subs r4, r2, #32 +; V7A-NEXT: lsrpl r0, r1, r4 +; V7A-NEXT: lsr r1, r1, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: and r0, r3, r0 +; V7A-NEXT: and r1, r12, r1 +; V7A-NEXT: pop {r4, pc} +; +; V7A-T-LABEL: bextr64_a1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: rsb.w r4, r3, #32 +; V7A-T-NEXT: mov.w lr, #1 +; V7A-T-NEXT: subs.w r12, r3, #32 +; V7A-T-NEXT: lsl.w r3, lr, r3 +; V7A-T-NEXT: lsr.w r4, lr, r4 +; V7A-T-NEXT: lsr.w r0, r0, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r4, lr, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r3, #0 +; V7A-T-NEXT: subs r3, #1 +; V7A-T-NEXT: sbc r12, r4, #0 +; V7A-T-NEXT: rsb.w r4, r2, #32 +; V7A-T-NEXT: lsl.w r4, r1, r4 +; V7A-T-NEXT: orrs r0, r4 +; V7A-T-NEXT: subs.w r4, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r4 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: and.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: and.w r1, r1, r12 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_a1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, r7, lr} +; V6M-NEXT: push {r4, r5, r6, r7, lr} +; V6M-NEXT: .pad #12 +; V6M-NEXT: sub sp, #12 +; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill +; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill +; V6M-NEXT: mov r6, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r7, #0 +; V6M-NEXT: mov r1, r7 +; V6M-NEXT: mov r2, r3 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: subs r5, r0, #1 +; V6M-NEXT: sbcs r4, r7 +; V6M-NEXT: mov r0, r6 +; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: add sp, #12 +; V6M-NEXT: pop {r4, r5, r6, r7, pc} + %skip = zext i8 %numskipbits to i64 + %shifted = lshr i64 %val, %skip + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_a2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_a2_load: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: mov.w lr, #1 +; V7M-NEXT: rsb.w r1, r12, #32 +; V7M-NEXT: subs.w r3, r12, #32 +; V7M-NEXT: lsr.w r1, lr, r1 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, lr, r3 +; V7M-NEXT: lsl.w r3, lr, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: subs.w lr, r3, #1 +; V7M-NEXT: ldrd r0, r3, [r0] +; V7M-NEXT: sbc r12, r1, #0 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: lsl.w r1, r3, r1 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: orrs r0, r1 +; V7M-NEXT: subs.w r1, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r3, r1 +; V7M-NEXT: lsr.w r1, r3, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: and.w r0, r0, lr +; V7M-NEXT: and.w r1, r1, r12 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bextr64_a2_load: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r6, lr} +; V7A-NEXT: push {r4, r5, r6, lr} +; V7A-NEXT: ldr r1, [sp, #16] +; V7A-NEXT: mov r3, #1 +; V7A-NEXT: ldr r6, [r0] +; V7A-NEXT: ldr r5, [r0, #4] +; V7A-NEXT: rsb r0, r1, #32 +; V7A-NEXT: subs r4, r1, #32 +; V7A-NEXT: lsl r1, r3, r1 +; V7A-NEXT: lsr r0, r3, r0 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: lslpl r0, r3, r4 +; V7A-NEXT: subs r1, r1, #1 +; V7A-NEXT: sbc r3, r0, #0 +; V7A-NEXT: lsr r0, r6, r2 +; V7A-NEXT: rsb r6, r2, #32 +; V7A-NEXT: orr r0, r0, r5, lsl r6 +; V7A-NEXT: subs r6, r2, #32 +; V7A-NEXT: lsrpl r0, r5, r6 +; V7A-NEXT: and r0, r1, r0 +; V7A-NEXT: lsr r1, r5, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: and r1, r3, r1 +; V7A-NEXT: pop {r4, r5, r6, pc} +; +; V7A-T-LABEL: bextr64_a2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: movs r3, #1 +; V7A-T-NEXT: ldrd lr, r1, [r0] +; V7A-T-NEXT: rsb.w r4, r12, #32 +; V7A-T-NEXT: subs.w r0, r12, #32 +; V7A-T-NEXT: lsr.w r4, r3, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r4, r3, r0 +; V7A-T-NEXT: lsl.w r0, r3, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsr.w r3, lr, r2 +; V7A-T-NEXT: subs r0, #1 +; V7A-T-NEXT: sbc r12, r4, #0 +; V7A-T-NEXT: rsb.w r4, r2, #32 +; V7A-T-NEXT: lsl.w r4, r1, r4 +; V7A-T-NEXT: orrs r3, r4 +; V7A-T-NEXT: subs.w r4, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r3, r1, r4 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: and.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: and.w r1, r1, r12 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_a2_load: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, r7, lr} +; V6M-NEXT: push {r4, r5, r6, r7, lr} +; V6M-NEXT: .pad #4 +; V6M-NEXT: sub sp, #4 +; V6M-NEXT: str r2, [sp] @ 4-byte Spill +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r7, #0 +; V6M-NEXT: ldr r2, [sp, #24] +; V6M-NEXT: mov r1, r7 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r6, r1 +; V6M-NEXT: subs r4, r0, #1 +; V6M-NEXT: sbcs r6, r7 +; V6M-NEXT: ldm r5!, {r0, r1} +; V6M-NEXT: ldr r2, [sp] @ 4-byte Reload +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r4 +; V6M-NEXT: ands r1, r6 +; V6M-NEXT: add sp, #4 +; V6M-NEXT: pop {r4, r5, r6, r7, pc} + %val = load i64, ptr %w + %shifted = lshr i64 %val, %numskipbits + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bextr64_a3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: mov.w r12, #1 +; V7M-NEXT: subs.w lr, r2, #32 +; V7M-NEXT: lsl.w r2, r12, r2 +; V7M-NEXT: lsr.w r3, r12, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r3, r12, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: subs.w lr, r2, #1 +; V7M-NEXT: ldrd r0, r2, [r0] +; V7M-NEXT: sbc r12, r3, #0 +; V7M-NEXT: rsb.w r3, r1, #32 +; V7M-NEXT: lsl.w r3, r2, r3 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: orrs r0, r3 +; V7M-NEXT: subs.w r3, r1, #32 +; V7M-NEXT: lsr.w r1, r2, r1 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r2, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: and.w r0, r0, lr +; V7M-NEXT: and.w r1, r1, r12 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bextr64_a3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r6, lr} +; V7A-NEXT: push {r4, r5, r6, lr} +; V7A-NEXT: ldr r6, [r0] +; V7A-NEXT: mov r3, #1 +; V7A-NEXT: ldr r5, [r0, #4] +; V7A-NEXT: rsb r0, r2, #32 +; V7A-NEXT: subs r4, r2, #32 +; V7A-NEXT: lsl r2, r3, r2 +; V7A-NEXT: lsr r0, r3, r0 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: lslpl r0, r3, r4 +; V7A-NEXT: subs r3, r2, #1 +; V7A-NEXT: sbc r0, r0, #0 +; V7A-NEXT: lsr r2, r5, r1 +; V7A-NEXT: subs r4, r1, #32 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: and r2, r0, r2 +; V7A-NEXT: lsr r0, r6, r1 +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: orr r0, r0, r5, lsl r1 +; V7A-NEXT: mov r1, r2 +; V7A-NEXT: lsrpl r0, r5, r4 +; V7A-NEXT: and r0, r3, r0 +; V7A-NEXT: pop {r4, r5, r6, pc} +; +; V7A-T-LABEL: bextr64_a3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: rsb.w r4, r2, #32 +; V7A-T-NEXT: mov.w lr, #1 +; V7A-T-NEXT: subs.w r3, r2, #32 +; V7A-T-NEXT: lsl.w r2, lr, r2 +; V7A-T-NEXT: lsr.w r4, lr, r4 +; V7A-T-NEXT: ldrd r12, r0, [r0] +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r4, lr, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: subs.w lr, r2, #1 +; V7A-T-NEXT: sbc r2, r4, #0 +; V7A-T-NEXT: lsr.w r4, r0, r1 +; V7A-T-NEXT: subs.w r3, r1, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r4, #0 +; V7A-T-NEXT: and.w r2, r2, r4 +; V7A-T-NEXT: rsb.w r4, r1, #32 +; V7A-T-NEXT: lsr.w r1, r12, r1 +; V7A-T-NEXT: lsl.w r4, r0, r4 +; V7A-T-NEXT: orr.w r1, r1, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r1, r0, r3 +; V7A-T-NEXT: and.w r0, lr, r1 +; V7A-T-NEXT: mov r1, r2 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_a3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, r7, lr} +; V6M-NEXT: push {r4, r5, r6, r7, lr} +; V6M-NEXT: .pad #4 +; V6M-NEXT: sub sp, #4 +; V6M-NEXT: str r1, [sp] @ 4-byte Spill +; V6M-NEXT: mov r6, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r7, #0 +; V6M-NEXT: mov r1, r7 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r5, r1 +; V6M-NEXT: subs r4, r0, #1 +; V6M-NEXT: sbcs r5, r7 +; V6M-NEXT: ldm r6!, {r0, r1} +; V6M-NEXT: ldr r2, [sp] @ 4-byte Reload +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r4 +; V6M-NEXT: ands r1, r5 +; V6M-NEXT: add sp, #4 +; V6M-NEXT: pop {r4, r5, r6, r7, pc} + %val = load i64, ptr %w + %skip = zext i8 %numskipbits to i64 + %shifted = lshr i64 %val, %skip + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_a4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: mov.w lr, #1 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: rsb.w r4, r12, #32 +; V7M-NEXT: subs.w r3, r12, #32 +; V7M-NEXT: lsr.w r4, lr, r4 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r4, lr, r3 +; V7M-NEXT: lsl.w r3, lr, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: subs r3, #1 +; V7M-NEXT: sbc r12, r4, #0 +; V7M-NEXT: rsb.w r4, r2, #32 +; V7M-NEXT: lsl.w r4, r1, r4 +; V7M-NEXT: orrs r0, r4 +; V7M-NEXT: subs.w r4, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r4 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: and.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: and.w r1, r1, r12 +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_a4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r11, lr} +; V7A-NEXT: push {r4, r5, r11, lr} +; V7A-NEXT: ldr lr, [sp, #16] +; V7A-NEXT: mov r5, #1 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: rsb r12, lr, #32 +; V7A-NEXT: subs r4, lr, #32 +; V7A-NEXT: lsr r3, r5, r12 +; V7A-NEXT: lslpl r3, r5, r4 +; V7A-NEXT: lsl r5, r5, lr +; V7A-NEXT: movwpl r5, #0 +; V7A-NEXT: rsb r4, r2, #32 +; V7A-NEXT: subs r5, r5, #1 +; V7A-NEXT: sbc r3, r3, #0 +; V7A-NEXT: orr r0, r0, r1, lsl r4 +; V7A-NEXT: subs r4, r2, #32 +; V7A-NEXT: lsrpl r0, r1, r4 +; V7A-NEXT: lsr r1, r1, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: and r0, r0, r5 +; V7A-NEXT: and r1, r1, r3 +; V7A-NEXT: pop {r4, r5, r11, pc} +; +; V7A-T-LABEL: bextr64_a4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: mov.w lr, #1 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: rsb.w r4, r12, #32 +; V7A-T-NEXT: subs.w r3, r12, #32 +; V7A-T-NEXT: lsr.w r4, lr, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r4, lr, r3 +; V7A-T-NEXT: lsl.w r3, lr, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r3, #0 +; V7A-T-NEXT: subs r3, #1 +; V7A-T-NEXT: sbc r12, r4, #0 +; V7A-T-NEXT: rsb.w r4, r2, #32 +; V7A-T-NEXT: lsl.w r4, r1, r4 +; V7A-T-NEXT: orrs r0, r4 +; V7A-T-NEXT: subs.w r4, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r4 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: and.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: and.w r1, r1, r12 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_a4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, r7, lr} +; V6M-NEXT: push {r4, r5, r6, r7, lr} +; V6M-NEXT: .pad #12 +; V6M-NEXT: sub sp, #12 +; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill +; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill +; V6M-NEXT: mov r6, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r7, #0 +; V6M-NEXT: ldr r2, [sp, #32] +; V6M-NEXT: mov r1, r7 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: subs r5, r0, #1 +; V6M-NEXT: sbcs r4, r7 +; V6M-NEXT: mov r0, r6 +; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: add sp, #12 +; V6M-NEXT: pop {r4, r5, r6, r7, pc} + %shifted = lshr i64 %val, %numskipbits + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %shifted, %mask ; swapped order + ret i64 %masked +} + +; 64-bit, but with 32-bit output + +; Everything done in 64-bit, truncation happens last. +define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_a0: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldr r1, [sp] +; V7M-NEXT: movs r2, #1 +; V7M-NEXT: lsls r2, r1 +; V7M-NEXT: subs r1, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: subs r1, r2, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_a0: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldr r12, [sp] +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: mov r1, #1 +; V7A-NEXT: lsl r1, r1, r12 +; V7A-NEXT: subs r2, r12, #32 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: sub r1, r1, #1 +; V7A-NEXT: and r0, r1, r0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_32_a0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsl.w r1, r1, r12 +; V7A-T-NEXT: subs.w r2, r12, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_a0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: ldr r2, [sp, #8] +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: subs r0, r0, #1 +; V6M-NEXT: ands r0, r4 +; V6M-NEXT: pop {r4, pc} + %shifted = lshr i64 %val, %numskipbits + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %shifted + %res = trunc i64 %masked to i32 + ret i32 %res +} + +; Shifting happens in 64-bit, then truncation. Masking is 32-bit. +define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_a1: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldr r1, [sp] +; V7M-NEXT: movs r2, #1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_a1: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: ldr r12, [sp, #8] +; V7A-NEXT: mov lr, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: add r12, r3, lr, lsl r12 +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: and r0, r12, r0 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bextr64_32_a1: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsl.w r1, r1, r12 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_a1: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r7, lr} +; V6M-NEXT: push {r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldr r1, [sp, #8] +; V6M-NEXT: movs r2, #1 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: subs r1, r2, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: pop {r7, pc} + %shifted = lshr i64 %val, %numskipbits + %truncshifted = trunc i64 %shifted to i32 + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %truncshifted + ret i32 %masked +} + +; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. +; Masking is 64-bit. Then truncation. +define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_a2: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldr r1, [sp] +; V7M-NEXT: movs r2, #1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_a2: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: ldr r12, [sp, #8] +; V7A-NEXT: mov lr, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: add r12, r3, lr, lsl r12 +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: and r0, r12, r0 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bextr64_32_a2: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: lsl.w r1, r1, r12 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_a2: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r7, lr} +; V6M-NEXT: push {r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldr r1, [sp, #8] +; V6M-NEXT: movs r2, #1 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: subs r1, r2, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: pop {r7, pc} + %shifted = lshr i64 %val, %numskipbits + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %zextmask = zext i32 %mask to i64 + %masked = and i64 %zextmask, %shifted + %truncmasked = trunc i64 %masked to i32 + ret i32 %truncmasked +} + +; ---------------------------------------------------------------------------- ; +; Pattern b. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_b0: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: lsl.w r2, r3, r2 +; V7M-NEXT: bics r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_b0: +; V7A: @ %bb.0: +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: bic r0, r0, r1, lsl r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_b0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: lsl.w r2, r3, r2 +; V7A-T-NEXT: bics r0, r2 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_b0: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: mvns r1, r1 +; V6M-NEXT: lsls r1, r2 +; V6M-NEXT: bics r0, r1 +; V6M-NEXT: bx lr + %shifted = lshr i32 %val, %numskipbits + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bextr32_b1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: lsl.w r2, r3, r2 +; V7M-NEXT: bics r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_b1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: bic r0, r0, r1, lsl r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_b1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: lsl.w r2, r3, r2 +; V7A-T-NEXT: bics r0, r2 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_b1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: mvns r1, r1 +; V6M-NEXT: lsls r1, r2 +; V6M-NEXT: bics r0, r1 +; V6M-NEXT: bx lr + %skip = zext i8 %numskipbits to i32 + %shifted = lshr i32 %val, %skip + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_b2_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsl.w r2, r3, r2 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bics r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_b2_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: bic r0, r0, r1, lsl r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_b2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsl.w r2, r3, r2 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bics r0, r2 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_b2_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r3, #0 +; V6M-NEXT: mvns r3, r3 +; V6M-NEXT: lsls r3, r2 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bics r0, r3 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %shifted = lshr i32 %val, %numskipbits + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bextr32_b3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsl.w r2, r3, r2 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bics r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_b3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: bic r0, r0, r1, lsl r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_b3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsl.w r2, r3, r2 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bics r0, r2 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_b3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r3, #0 +; V6M-NEXT: mvns r3, r3 +; V6M-NEXT: lsls r3, r2 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bics r0, r3 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %skip = zext i8 %numskipbits to i32 + %shifted = lshr i32 %val, %skip + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_b4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: lsl.w r2, r3, r2 +; V7M-NEXT: bics r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_b4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: bic r0, r0, r1, lsl r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_b4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: lsl.w r2, r3, r2 +; V7A-T-NEXT: bics r0, r2 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_b4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: mvns r1, r1 +; V6M-NEXT: lsls r1, r2 +; V6M-NEXT: bics r0, r1 +; V6M-NEXT: bx lr + %shifted = lshr i32 %val, %numskipbits + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %shifted, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_b0: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orrs r0, r3 +; V7M-NEXT: subs.w r3, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r3 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: lsl.w r3, r2, r12 +; V7M-NEXT: subs.w lr, r12, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r2, r2, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: bics r1, r2 +; V7M-NEXT: bics r0, r3 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bextr64_b0: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldr r12, [sp, #8] +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: subs r3, r2, #32 +; V7A-NEXT: lsrpl r0, r1, r3 +; V7A-NEXT: lsr r1, r1, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: subs lr, r12, #32 +; V7A-NEXT: lsl r2, r3, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: bic r0, r0, r2 +; V7A-NEXT: lslpl r3, r3, lr +; V7A-NEXT: bic r1, r1, r3 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bextr64_b0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, r5, r7, lr} +; V7A-T-NEXT: push {r4, r5, r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: ldr.w r12, [sp, #16] +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r5, r0, r3 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: subs.w lr, r12, #32 +; V7A-T-NEXT: lsl.w r0, r3, r12 +; V7A-T-NEXT: itt pl +; V7A-T-NEXT: lslpl.w r3, r3, lr +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: subs.w r4, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r5, r1, r4 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: bic.w r0, r5, r0 +; V7A-T-NEXT: bics r1, r3 +; V7A-T-NEXT: pop {r4, r5, r7, pc} +; +; V6M-LABEL: bextr64_b0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: mov r5, r1 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: ldr r2, [sp, #16] +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r4, r0 +; V6M-NEXT: bics r5, r1 +; V6M-NEXT: mov r0, r4 +; V6M-NEXT: mov r1, r5 +; V6M-NEXT: pop {r4, r5, r7, pc} + %shifted = lshr i64 %val, %numskipbits + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bextr64_b1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: lsr.w r12, r0, r2 +; V7M-NEXT: rsb.w r0, r2, #32 +; V7M-NEXT: lsl.w r0, r1, r0 +; V7M-NEXT: orr.w r12, r12, r0 +; V7M-NEXT: subs.w r0, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r12, r1, r0 +; V7M-NEXT: lsr.w r0, r1, r2 +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: subs.w r1, r3, #32 +; V7M-NEXT: lsl.w r3, r2, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl r2, r1 +; V7M-NEXT: bic.w r1, r0, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: bic.w r0, r12, r3 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_b1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: lsr r12, r0, r2 +; V7A-NEXT: rsb r0, r2, #32 +; V7A-NEXT: orr r12, r12, r1, lsl r0 +; V7A-NEXT: subs r0, r2, #32 +; V7A-NEXT: lsrpl r12, r1, r0 +; V7A-NEXT: lsr r0, r1, r2 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: subs r1, r3, #32 +; V7A-NEXT: mvn r2, #0 +; V7A-NEXT: lsl r3, r2, r3 +; V7A-NEXT: lslpl r2, r2, r1 +; V7A-NEXT: bic r1, r0, r2 +; V7A-NEXT: movwpl r3, #0 +; V7A-NEXT: bic r0, r12, r3 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_b1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsr.w r12, r0, r2 +; V7A-T-NEXT: rsb.w r0, r2, #32 +; V7A-T-NEXT: lsl.w r0, r1, r0 +; V7A-T-NEXT: orr.w r12, r12, r0 +; V7A-T-NEXT: subs.w r0, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r12, r1, r0 +; V7A-T-NEXT: lsr.w r0, r1, r2 +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: subs.w r1, r3, #32 +; V7A-T-NEXT: lsl.w r3, r2, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl r2, r1 +; V7A-T-NEXT: bic.w r1, r0, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r3, #0 +; V7A-T-NEXT: bic.w r0, r12, r3 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_b1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, lr} +; V6M-NEXT: push {r4, r5, r6, lr} +; V6M-NEXT: mov r4, r3 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: mov r6, r1 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r5, r0 +; V6M-NEXT: bics r6, r1 +; V6M-NEXT: mov r0, r5 +; V6M-NEXT: mov r1, r6 +; V6M-NEXT: pop {r4, r5, r6, pc} + %skip = zext i8 %numskipbits to i64 + %shifted = lshr i64 %val, %skip + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_b2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_b2_load: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: ldrd r0, r3, [r0] +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: lsl.w r1, r3, r1 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: orrs r0, r1 +; V7M-NEXT: subs.w r1, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r3, r1 +; V7M-NEXT: lsr.w r1, r3, r2 +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: lsl.w r3, r2, r12 +; V7M-NEXT: subs.w lr, r12, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r2, r2, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: bics r1, r2 +; V7M-NEXT: bics r0, r3 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bextr64_b2_load: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: ldrd r0, r1, [r0] +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: ldr r12, [sp, #8] +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: subs r3, r2, #32 +; V7A-NEXT: lsrpl r0, r1, r3 +; V7A-NEXT: lsr r1, r1, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: subs lr, r12, #32 +; V7A-NEXT: lsl r2, r3, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: bic r0, r0, r2 +; V7A-NEXT: lslpl r3, r3, lr +; V7A-NEXT: bic r1, r1, r3 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bextr64_b2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: ldrd r0, r3, [r0] +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: lsl.w r1, r3, r1 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: orrs r0, r1 +; V7A-T-NEXT: subs.w r1, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r3, r1 +; V7A-T-NEXT: lsr.w r1, r3, r2 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: lsl.w r2, r3, r12 +; V7A-T-NEXT: subs.w lr, r12, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r3, r3, lr +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: bics r1, r3 +; V7A-T-NEXT: bics r0, r2 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bextr64_b2_load: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: ldr r3, [r0] +; V6M-NEXT: ldr r1, [r0, #4] +; V6M-NEXT: mov r0, r3 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: mov r5, r1 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: ldr r2, [sp, #16] +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r4, r0 +; V6M-NEXT: bics r5, r1 +; V6M-NEXT: mov r0, r4 +; V6M-NEXT: mov r1, r5 +; V6M-NEXT: pop {r4, r5, r7, pc} + %val = load i64, ptr %w + %shifted = lshr i64 %val, %numskipbits + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bextr64_b3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: ldrd r12, r0, [r0] +; V7M-NEXT: rsb.w r3, r1, #32 +; V7M-NEXT: lsl.w lr, r0, r3 +; V7M-NEXT: lsr.w r3, r12, r1 +; V7M-NEXT: orr.w r12, r3, lr +; V7M-NEXT: subs.w r3, r1, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r12, r0, r3 +; V7M-NEXT: lsr.w r0, r0, r1 +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: subs.w r1, r2, #32 +; V7M-NEXT: lsl.w r2, r3, r2 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl r3, r1 +; V7M-NEXT: bic.w r1, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: bic.w r0, r12, r2 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bextr64_b3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: ldm r0, {r0, r3} +; V7A-NEXT: lsr r12, r0, r1 +; V7A-NEXT: rsb r0, r1, #32 +; V7A-NEXT: orr r12, r12, r3, lsl r0 +; V7A-NEXT: subs r0, r1, #32 +; V7A-NEXT: lsrpl r12, r3, r0 +; V7A-NEXT: lsr r0, r3, r1 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: subs r1, r2, #32 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsl r2, r3, r2 +; V7A-NEXT: lslpl r3, r3, r1 +; V7A-NEXT: bic r1, r0, r3 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: bic r0, r12, r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_b3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: ldrd r12, r3, [r0] +; V7A-T-NEXT: rsb.w r0, r1, #32 +; V7A-T-NEXT: lsl.w lr, r3, r0 +; V7A-T-NEXT: lsr.w r0, r12, r1 +; V7A-T-NEXT: orr.w r12, r0, lr +; V7A-T-NEXT: subs.w r0, r1, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r12, r3, r0 +; V7A-T-NEXT: lsr.w r0, r3, r1 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: subs.w r1, r2, #32 +; V7A-T-NEXT: lsl.w r2, r3, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl r3, r1 +; V7A-T-NEXT: bic.w r1, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: bic.w r0, r12, r2 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bextr64_b3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, lr} +; V6M-NEXT: push {r4, r5, r6, lr} +; V6M-NEXT: mov r4, r2 +; V6M-NEXT: mov r2, r1 +; V6M-NEXT: ldr r3, [r0] +; V6M-NEXT: ldr r1, [r0, #4] +; V6M-NEXT: mov r0, r3 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: mov r6, r1 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r5, r0 +; V6M-NEXT: bics r6, r1 +; V6M-NEXT: mov r0, r5 +; V6M-NEXT: mov r1, r6 +; V6M-NEXT: pop {r4, r5, r6, pc} + %val = load i64, ptr %w + %skip = zext i8 %numskipbits to i64 + %shifted = lshr i64 %val, %skip + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_b4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orrs r0, r3 +; V7M-NEXT: subs.w r3, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r3 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: lsl.w r3, r2, r12 +; V7M-NEXT: subs.w lr, r12, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r2, r2, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: bics r1, r2 +; V7M-NEXT: bics r0, r3 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bextr64_b4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldr r12, [sp, #8] +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: subs r3, r2, #32 +; V7A-NEXT: lsrpl r0, r1, r3 +; V7A-NEXT: lsr r1, r1, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: subs lr, r12, #32 +; V7A-NEXT: lsl r2, r3, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: bic r0, r0, r2 +; V7A-NEXT: lslpl r3, r3, lr +; V7A-NEXT: bic r1, r1, r3 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bextr64_b4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, r5, r7, lr} +; V7A-T-NEXT: push {r4, r5, r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: ldr.w r12, [sp, #16] +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r5, r0, r3 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: subs.w lr, r12, #32 +; V7A-T-NEXT: lsl.w r0, r3, r12 +; V7A-T-NEXT: itt pl +; V7A-T-NEXT: lslpl.w r3, r3, lr +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: subs.w r4, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r5, r1, r4 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: bic.w r0, r5, r0 +; V7A-T-NEXT: bics r1, r3 +; V7A-T-NEXT: pop {r4, r5, r7, pc} +; +; V6M-LABEL: bextr64_b4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: mov r5, r1 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: ldr r2, [sp, #16] +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r4, r0 +; V6M-NEXT: bics r5, r1 +; V6M-NEXT: mov r0, r4 +; V6M-NEXT: mov r1, r5 +; V6M-NEXT: pop {r4, r5, r7, pc} + %shifted = lshr i64 %val, %numskipbits + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %shifted, %mask ; swapped order + ret i64 %masked +} + +; 64-bit, but with 32-bit output + +; Everything done in 64-bit, truncation happens last. +define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_b0: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldrb.w r1, [sp] +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: lsls r2, r1 +; V7M-NEXT: subs r1, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: bics r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_b0: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldrb r12, [sp] +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: lsl r1, r1, r12 +; V7A-NEXT: subs r2, r12, #32 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: bic r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_32_b0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsr.w r12, r0, r2 +; V7A-T-NEXT: rsb.w r0, r2, #32 +; V7A-T-NEXT: ldrb.w r3, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r0, r1, r0 +; V7A-T-NEXT: orr.w r0, r0, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: mov.w r1, #-1 +; V7A-T-NEXT: lsls r1, r3 +; V7A-T-NEXT: subs.w r2, r3, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: bics r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_b0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: add r1, sp, #8 +; V6M-NEXT: ldrb r2, [r1] +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r4, r0 +; V6M-NEXT: mov r0, r4 +; V6M-NEXT: pop {r4, pc} + %shiftedval = lshr i64 %val, %numskipbits + %widenumlowbits = zext i8 %numlowbits to i64 + %notmask = shl nsw i64 -1, %widenumlowbits + %mask = xor i64 %notmask, -1 + %wideres = and i64 %shiftedval, %mask + %res = trunc i64 %wideres to i32 + ret i32 %res +} + +; Shifting happens in 64-bit, then truncation. Masking is 32-bit. +define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_b1: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldrb.w r1, [sp] +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: bics r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_b1: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldrb r12, [sp] +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: bic r0, r0, r1, lsl r12 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_32_b1: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldrb.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: mov.w r1, #-1 +; V7A-T-NEXT: lsl.w r1, r1, r12 +; V7A-T-NEXT: bics r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_b1: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r7, lr} +; V6M-NEXT: push {r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: add r1, sp, #8 +; V6M-NEXT: ldrb r1, [r1] +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: mvns r2, r2 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: bics r0, r2 +; V6M-NEXT: pop {r7, pc} + %shiftedval = lshr i64 %val, %numskipbits + %truncshiftedval = trunc i64 %shiftedval to i32 + %widenumlowbits = zext i8 %numlowbits to i32 + %notmask = shl nsw i32 -1, %widenumlowbits + %mask = xor i32 %notmask, -1 + %res = and i32 %truncshiftedval, %mask + ret i32 %res +} + +; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. +; Masking is 64-bit. Then truncation. +define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_b2: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldrb.w r1, [sp] +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: bics r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_b2: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldrb r12, [sp] +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: bic r0, r0, r1, lsl r12 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_32_b2: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldrb.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: mov.w r1, #-1 +; V7A-T-NEXT: lsl.w r1, r1, r12 +; V7A-T-NEXT: bics r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_b2: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r7, lr} +; V6M-NEXT: push {r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: add r1, sp, #8 +; V6M-NEXT: ldrb r1, [r1] +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: mvns r2, r2 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: bics r0, r2 +; V6M-NEXT: pop {r7, pc} + %shiftedval = lshr i64 %val, %numskipbits + %widenumlowbits = zext i8 %numlowbits to i32 + %notmask = shl nsw i32 -1, %widenumlowbits + %mask = xor i32 %notmask, -1 + %zextmask = zext i32 %mask to i64 + %wideres = and i64 %shiftedval, %zextmask + %res = trunc i64 %wideres to i32 + ret i32 %res +} + +; ---------------------------------------------------------------------------- ; +; Pattern c. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_c0: +; V7M: @ %bb.0: +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_c0: +; V7A: @ %bb.0: +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_c0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_c0: +; V6M: @ %bb.0: +; V6M-NEXT: movs r3, #32 +; V6M-NEXT: subs r2, r3, r2 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: lsls r0, r2 +; V6M-NEXT: lsrs r0, r2 +; V6M-NEXT: bx lr + %shifted = lshr i32 %val, %numskipbits + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr32_c1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_c1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_c1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_c1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #32 +; V6M-NEXT: subs r1, r1, r2 +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %skip = zext i8 %numskipbits to i32 + %shifted = lshr i32 %val, %skip + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_c2_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_c2_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_c2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_c2_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r3, #32 +; V6M-NEXT: subs r2, r3, r2 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: lsls r0, r2 +; V6M-NEXT: lsrs r0, r2 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %shifted = lshr i32 %val, %numskipbits + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr32_c3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_c3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_c3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_c3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #32 +; V6M-NEXT: subs r1, r1, r2 +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %skip = zext i8 %numskipbits to i32 + %shifted = lshr i32 %val, %skip + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %shifted + ret i32 %masked +} + +define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_c4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_c4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_c4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_c4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: movs r3, #32 +; V6M-NEXT: subs r2, r3, r2 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: lsls r0, r2 +; V6M-NEXT: lsrs r0, r2 +; V6M-NEXT: bx lr + %shifted = lshr i32 %val, %numskipbits + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %shifted, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_c0: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: ldr.w r12, [sp] +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orrs r0, r3 +; V7M-NEXT: subs.w r3, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r3 +; V7M-NEXT: rsb.w r3, r12, #64 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: lsr.w r3, r2, r3 +; V7M-NEXT: rsbs.w r12, r12, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r2, r2, r12 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_c0: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r11, lr} +; V7A-NEXT: push {r4, r5, r11, lr} +; V7A-NEXT: ldr r12, [sp, #16] +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsr r5, r1, r2 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: rsb r4, r12, #64 +; V7A-NEXT: rsbs lr, r12, #32 +; V7A-NEXT: lsr r4, r3, r4 +; V7A-NEXT: lsrpl r3, r3, lr +; V7A-NEXT: movwpl r4, #0 +; V7A-NEXT: subs lr, r2, #32 +; V7A-NEXT: rsb r2, r2, #32 +; V7A-NEXT: movwpl r5, #0 +; V7A-NEXT: and r12, r4, r5 +; V7A-NEXT: orr r0, r0, r1, lsl r2 +; V7A-NEXT: lsrpl r0, r1, lr +; V7A-NEXT: mov r1, r12 +; V7A-NEXT: and r0, r3, r0 +; V7A-NEXT: pop {r4, r5, r11, pc} +; +; V7A-T-LABEL: bextr64_c0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: mov.w lr, #-1 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orrs r0, r3 +; V7A-T-NEXT: subs.w r3, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r3 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: rsbs.w r2, r12, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl r3, r2 +; V7A-T-NEXT: rsb.w r2, r12, #64 +; V7A-T-NEXT: and.w r0, r0, r3 +; V7A-T-NEXT: lsr.w r2, lr, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: ands r1, r2 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bextr64_c0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: ldr r0, [sp, #16] +; V6M-NEXT: movs r1, #64 +; V6M-NEXT: subs r2, r1, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %shifted = lshr i64 %val, %numskipbits + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr64_c1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: uxtb r2, r2 +; V7M-NEXT: lsr.w r12, r0, r2 +; V7M-NEXT: rsb.w r0, r2, #32 +; V7M-NEXT: lsl.w r0, r1, r0 +; V7M-NEXT: orr.w r12, r12, r0 +; V7M-NEXT: subs.w r0, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r12, r1, r0 +; V7M-NEXT: rsb.w r0, r3, #64 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: uxtb r0, r0 +; V7M-NEXT: subs.w lr, r0, #32 +; V7M-NEXT: lsr.w r2, r3, r0 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r3, r3, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: and.w r0, r3, r12 +; V7M-NEXT: ands r1, r2 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bextr64_c1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, lr} +; V7A-NEXT: push {r4, lr} +; V7A-NEXT: uxtb r12, r2 +; V7A-NEXT: lsr lr, r0, r12 +; V7A-NEXT: rsb r0, r12, #32 +; V7A-NEXT: orr r4, lr, r1, lsl r0 +; V7A-NEXT: mvn lr, #31 +; V7A-NEXT: uxtab r2, lr, r2 +; V7A-NEXT: cmp r2, #0 +; V7A-NEXT: lsrpl r4, r1, r2 +; V7A-NEXT: rsb r2, r3, #64 +; V7A-NEXT: lsr r1, r1, r12 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: uxtb r12, r2 +; V7A-NEXT: uxtab r2, lr, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: lsr r0, r3, r12 +; V7A-NEXT: cmp r2, #0 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: and r1, r0, r1 +; V7A-NEXT: lsrpl r3, r3, r2 +; V7A-NEXT: and r0, r3, r4 +; V7A-NEXT: pop {r4, pc} +; +; V7A-T-LABEL: bextr64_c1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: uxtb.w r12, r2 +; V7A-T-NEXT: lsr.w lr, r0, r12 +; V7A-T-NEXT: rsb.w r0, r12, #32 +; V7A-T-NEXT: lsl.w r0, r1, r0 +; V7A-T-NEXT: orr.w r4, lr, r0 +; V7A-T-NEXT: mvn lr, #31 +; V7A-T-NEXT: uxtab r2, lr, r2 +; V7A-T-NEXT: cmp r2, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r4, r1, r2 +; V7A-T-NEXT: rsb.w r2, r3, #64 +; V7A-T-NEXT: lsr.w r1, r1, r12 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: uxtb.w r12, r2 +; V7A-T-NEXT: uxtab r2, lr, r2 +; V7A-T-NEXT: lsr.w r0, r3, r12 +; V7A-T-NEXT: cmp r2, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: and.w r1, r1, r0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl r3, r2 +; V7A-T-NEXT: and.w r0, r3, r4 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_c1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, lr} +; V6M-NEXT: push {r4, r5, r6, lr} +; V6M-NEXT: mov r5, r3 +; V6M-NEXT: uxtb r2, r2 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r6, r0 +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: movs r0, #64 +; V6M-NEXT: subs r0, r0, r5 +; V6M-NEXT: uxtb r2, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r6 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r6, pc} + %skip = zext i8 %numskipbits to i64 + %shifted = lshr i64 %val, %skip + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_c2_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldrd r0, r3, [r0] +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: ldr.w r12, [sp] +; V7M-NEXT: lsl.w r1, r3, r1 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: orrs r0, r1 +; V7M-NEXT: subs.w r1, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r3, r1 +; V7M-NEXT: lsr.w r1, r3, r2 +; V7M-NEXT: rsb.w r3, r12, #64 +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: rsbs.w r12, r12, #32 +; V7M-NEXT: lsr.w r3, r2, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r2, r2, r12 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_c2_load: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r6, r8, lr} +; V7A-NEXT: push {r4, r6, r8, lr} +; V7A-NEXT: ldr r12, [sp, #16] +; V7A-NEXT: ldr r3, [r0, #4] +; V7A-NEXT: rsb r6, r12, #64 +; V7A-NEXT: ldr r8, [r0] +; V7A-NEXT: mvn r0, #0 +; V7A-NEXT: rsbs r1, r12, #32 +; V7A-NEXT: lsr r6, r0, r6 +; V7A-NEXT: lsr r4, r3, r2 +; V7A-NEXT: lsrpl r0, r0, r1 +; V7A-NEXT: movwpl r6, #0 +; V7A-NEXT: subs r12, r2, #32 +; V7A-NEXT: movwpl r4, #0 +; V7A-NEXT: and r1, r6, r4 +; V7A-NEXT: lsr r6, r8, r2 +; V7A-NEXT: rsb r2, r2, #32 +; V7A-NEXT: orr r2, r6, r3, lsl r2 +; V7A-NEXT: lsrpl r2, r3, r12 +; V7A-NEXT: and r0, r0, r2 +; V7A-NEXT: pop {r4, r6, r8, pc} +; +; V7A-T-LABEL: bextr64_c2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldrd r0, r3, [r0] +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: ldr.w r12, [sp] +; V7A-T-NEXT: lsl.w r1, r3, r1 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: orrs r0, r1 +; V7A-T-NEXT: subs.w r1, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r3, r1 +; V7A-T-NEXT: lsr.w r1, r3, r2 +; V7A-T-NEXT: rsb.w r2, r12, #64 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: rsbs.w r12, r12, #32 +; V7A-T-NEXT: lsr.w r2, r3, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r3, r3, r12 +; V7A-T-NEXT: ands r1, r2 +; V7A-T-NEXT: ands r0, r3 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_c2_load: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: ldr r3, [r0] +; V6M-NEXT: ldr r1, [r0, #4] +; V6M-NEXT: mov r0, r3 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: ldr r0, [sp, #16] +; V6M-NEXT: movs r1, #64 +; V6M-NEXT: subs r2, r1, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %val = load i64, ptr %w + %shifted = lshr i64 %val, %numskipbits + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr64_c3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: ldrd r0, r3, [r0] +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsr.w r12, r0, r1 +; V7M-NEXT: rsb.w r0, r1, #32 +; V7M-NEXT: lsl.w r0, r3, r0 +; V7M-NEXT: orr.w r12, r12, r0 +; V7M-NEXT: subs.w r0, r1, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r12, r3, r0 +; V7M-NEXT: rsb.w r0, r2, #64 +; V7M-NEXT: lsr.w r1, r3, r1 +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: uxtb r0, r0 +; V7M-NEXT: subs.w lr, r0, #32 +; V7M-NEXT: lsr.w r2, r3, r0 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r3, r3, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: and.w r0, r3, r12 +; V7M-NEXT: ands r1, r2 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bextr64_c3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, lr} +; V7A-NEXT: push {r4, lr} +; V7A-NEXT: ldr r4, [r0] +; V7A-NEXT: ldr r3, [r0, #4] +; V7A-NEXT: uxtb r0, r1 +; V7A-NEXT: lsr r12, r4, r0 +; V7A-NEXT: rsb r4, r0, #32 +; V7A-NEXT: lsr r0, r3, r0 +; V7A-NEXT: orr lr, r12, r3, lsl r4 +; V7A-NEXT: mvn r12, #31 +; V7A-NEXT: uxtab r1, r12, r1 +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: lsrpl lr, r3, r1 +; V7A-NEXT: rsb r1, r2, #64 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: uxtb r2, r1 +; V7A-NEXT: uxtab r4, r12, r1 +; V7A-NEXT: lsr r2, r3, r2 +; V7A-NEXT: cmp r4, #0 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: and r1, r2, r0 +; V7A-NEXT: lsrpl r3, r3, r4 +; V7A-NEXT: and r0, r3, lr +; V7A-NEXT: pop {r4, pc} +; +; V7A-T-LABEL: bextr64_c3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, r5, r7, lr} +; V7A-T-NEXT: push {r4, r5, r7, lr} +; V7A-T-NEXT: ldrd r12, lr, [r0] +; V7A-T-NEXT: uxtb r0, r1 +; V7A-T-NEXT: rsb.w r3, r0, #32 +; V7A-T-NEXT: lsl.w r4, lr, r3 +; V7A-T-NEXT: lsr.w r3, r12, r0 +; V7A-T-NEXT: orr.w r5, r3, r4 +; V7A-T-NEXT: mvn r12, #31 +; V7A-T-NEXT: uxtab r1, r12, r1 +; V7A-T-NEXT: lsr.w r0, lr, r0 +; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r5, lr, r1 +; V7A-T-NEXT: rsb.w r1, r2, #64 +; V7A-T-NEXT: mov.w r4, #-1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: uxtb r2, r1 +; V7A-T-NEXT: uxtab r3, r12, r1 +; V7A-T-NEXT: lsr.w r2, r4, r2 +; V7A-T-NEXT: cmp r3, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: and.w r1, r2, r0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl r4, r3 +; V7A-T-NEXT: and.w r0, r4, r5 +; V7A-T-NEXT: pop {r4, r5, r7, pc} +; +; V6M-LABEL: bextr64_c3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, lr} +; V6M-NEXT: push {r4, r5, r6, lr} +; V6M-NEXT: mov r5, r2 +; V6M-NEXT: ldr r4, [r0] +; V6M-NEXT: ldr r3, [r0, #4] +; V6M-NEXT: uxtb r2, r1 +; V6M-NEXT: mov r0, r4 +; V6M-NEXT: mov r1, r3 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r6, r0 +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: movs r0, #64 +; V6M-NEXT: subs r0, r0, r5 +; V6M-NEXT: uxtb r2, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r6 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r6, pc} + %val = load i64, ptr %w + %skip = zext i8 %numskipbits to i64 + %shifted = lshr i64 %val, %skip + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %shifted + ret i64 %masked +} + +define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_c4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: ldr.w r12, [sp] +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orrs r0, r3 +; V7M-NEXT: subs.w r3, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r3 +; V7M-NEXT: rsb.w r3, r12, #64 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: lsr.w r3, r2, r3 +; V7M-NEXT: rsbs.w r12, r12, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r2, r2, r12 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_c4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r11, lr} +; V7A-NEXT: push {r4, r5, r11, lr} +; V7A-NEXT: ldr r12, [sp, #16] +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsr r5, r1, r2 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: rsb r4, r12, #64 +; V7A-NEXT: rsbs lr, r12, #32 +; V7A-NEXT: lsr r4, r3, r4 +; V7A-NEXT: lsrpl r3, r3, lr +; V7A-NEXT: movwpl r4, #0 +; V7A-NEXT: subs lr, r2, #32 +; V7A-NEXT: rsb r2, r2, #32 +; V7A-NEXT: movwpl r5, #0 +; V7A-NEXT: and r12, r5, r4 +; V7A-NEXT: orr r0, r0, r1, lsl r2 +; V7A-NEXT: lsrpl r0, r1, lr +; V7A-NEXT: mov r1, r12 +; V7A-NEXT: and r0, r0, r3 +; V7A-NEXT: pop {r4, r5, r11, pc} +; +; V7A-T-LABEL: bextr64_c4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: mov.w lr, #-1 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orrs r0, r3 +; V7A-T-NEXT: subs.w r3, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r3 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: rsbs.w r2, r12, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl r3, r2 +; V7A-T-NEXT: rsb.w r2, r12, #64 +; V7A-T-NEXT: and.w r0, r0, r3 +; V7A-T-NEXT: lsr.w r2, lr, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: ands r1, r2 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bextr64_c4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: ldr r0, [sp, #16] +; V6M-NEXT: movs r1, #64 +; V6M-NEXT: subs r2, r1, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %shifted = lshr i64 %val, %numskipbits + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %shifted, %mask ; swapped order + ret i64 %masked +} + +; 64-bit, but with 32-bit output + +; Everything done in 64-bit, truncation happens last. +define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_c0: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldr r1, [sp] +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: rsbs.w r1, r1, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl r2, r1 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_c0: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r3, [sp] +; V7A-NEXT: rsbs r12, r3, #32 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsrpl r3, r3, r12 +; V7A-NEXT: lsr r12, r0, r2 +; V7A-NEXT: rsb r0, r2, #32 +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r12, r1, lsl r0 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: and r0, r3, r0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_32_c0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: rsbs.w r1, r12, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl r2, r1 +; V7A-T-NEXT: ands r0, r2 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_c0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: ldr r0, [sp, #8] +; V6M-NEXT: movs r1, #64 +; V6M-NEXT: subs r2, r1, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r4 +; V6M-NEXT: pop {r4, pc} + %shifted = lshr i64 %val, %numskipbits + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %shifted + %res = trunc i64 %masked to i32 + ret i32 %res +} + +; Shifting happens in 64-bit, then truncation. Masking is 32-bit. +define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_c1: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldr r1, [sp] +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_c1: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldr r12, [sp] +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: rsb r1, r12, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_32_c1: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: rsb.w r1, r12, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_c1: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r7, lr} +; V6M-NEXT: push {r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldr r1, [sp, #8] +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: pop {r7, pc} + %shifted = lshr i64 %val, %numskipbits + %truncshifted = trunc i64 %shifted to i32 + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %truncshifted + ret i32 %masked +} + +; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. +; Masking is 64-bit. Then truncation. +define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_c2: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldr r1, [sp] +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_c2: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldr r12, [sp] +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: rsb r1, r12, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_32_c2: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: rsb.w r1, r12, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_c2: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r7, lr} +; V6M-NEXT: push {r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldr r1, [sp, #8] +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: pop {r7, pc} + %shifted = lshr i64 %val, %numskipbits + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %zextmask = zext i32 %mask to i64 + %masked = and i64 %zextmask, %shifted + %truncmasked = trunc i64 %masked to i32 + ret i32 %truncmasked +} + +; ---------------------------------------------------------------------------- ; +; Pattern d. 32-bit. +; ---------------------------------------------------------------------------- ; + +define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_d0: +; V7M: @ %bb.0: +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_d0: +; V7A: @ %bb.0: +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_d0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_d0: +; V6M: @ %bb.0: +; V6M-NEXT: movs r3, #32 +; V6M-NEXT: subs r2, r3, r2 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: lsls r0, r2 +; V6M-NEXT: lsrs r0, r2 +; V6M-NEXT: bx lr + %shifted = lshr i32 %val, %numskipbits + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %shifted, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr32_d1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_d1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_d1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_d1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #32 +; V6M-NEXT: subs r1, r1, r2 +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %skip = zext i8 %numskipbits to i32 + %shifted = lshr i32 %val, %skip + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %highbitscleared = shl i32 %shifted, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr32_d2_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_d2_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_d2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_d2_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r3, #32 +; V6M-NEXT: subs r2, r3, r2 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: lsls r0, r2 +; V6M-NEXT: lsrs r0, r2 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %shifted = lshr i32 %val, %numskipbits + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %shifted, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr32_d3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr32_d3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: rsb r1, r2, #32 +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr32_d3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr32_d3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: movs r1, #32 +; V6M-NEXT: subs r1, r1, r2 +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %skip = zext i8 %numskipbits to i32 + %shifted = lshr i32 %val, %skip + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %highbitscleared = shl i32 %shifted, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +; 64-bit. + +define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_d0: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orrs r0, r3 +; V7M-NEXT: subs.w r3, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r3 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: rsb.w r3, r12, #64 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: rsb.w lr, r12, #32 +; V7M-NEXT: rsb.w r12, r3, #32 +; V7M-NEXT: lsls r1, r3 +; V7M-NEXT: cmp.w lr, #0 +; V7M-NEXT: lsr.w r4, r0, r12 +; V7M-NEXT: orr.w r1, r1, r4 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, r0, lr +; V7M-NEXT: lsl.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsl.w r2, r1, r12 +; V7M-NEXT: lsr.w r0, r0, r3 +; V7M-NEXT: orr.w r0, r0, r2 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, lr +; V7M-NEXT: lsr.w r1, r1, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_d0: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: lsr r3, r1, r2 +; V7A-NEXT: subs lr, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: rsb r2, r2, #32 +; V7A-NEXT: ldr r12, [sp, #8] +; V7A-NEXT: movwpl r3, #0 +; V7A-NEXT: orr r0, r0, r1, lsl r2 +; V7A-NEXT: lsrpl r0, r1, lr +; V7A-NEXT: rsb r1, r12, #64 +; V7A-NEXT: rsb lr, r1, #32 +; V7A-NEXT: lsr r2, r0, lr +; V7A-NEXT: orr r2, r2, r3, lsl r1 +; V7A-NEXT: rsbs r3, r12, #32 +; V7A-NEXT: lslpl r2, r0, r3 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: lsr r1, r2, r1 +; V7A-NEXT: orr r0, r0, r2, lsl lr +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: lsrpl r0, r2, r3 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bextr64_d0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orrs r0, r3 +; V7A-T-NEXT: subs.w r3, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r3 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: rsb.w r3, r12, #64 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: rsb.w lr, r3, #32 +; V7A-T-NEXT: lsls r1, r3 +; V7A-T-NEXT: rsbs.w r2, r12, #32 +; V7A-T-NEXT: lsr.w r4, r0, lr +; V7A-T-NEXT: orr.w r1, r1, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r1, r0, r2 +; V7A-T-NEXT: lsl.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsl.w r4, r1, lr +; V7A-T-NEXT: lsr.w r0, r0, r3 +; V7A-T-NEXT: orr.w r0, r0, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: lsr.w r1, r1, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_d0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldr r2, [sp, #8] +; V6M-NEXT: movs r3, #64 +; V6M-NEXT: subs r4, r3, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, pc} + %shifted = lshr i64 %val, %numskipbits + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %shifted, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr64_d1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: uxtb.w lr, r2 +; V7M-NEXT: subs.w r2, lr, #32 +; V7M-NEXT: lsr.w r12, r0, lr +; V7M-NEXT: rsb.w r0, lr, #32 +; V7M-NEXT: lsl.w r0, r1, r0 +; V7M-NEXT: orr.w r0, r0, r12 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: rsb.w r2, r3, #64 +; V7M-NEXT: lsr.w r1, r1, lr +; V7M-NEXT: uxtb r2, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsls r1, r2 +; V7M-NEXT: sub.w r12, r2, #32 +; V7M-NEXT: lsr.w r4, r0, r3 +; V7M-NEXT: orrs r1, r4 +; V7M-NEXT: cmp.w r12, #0 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, r0, r12 +; V7M-NEXT: lsl.w r0, r0, r2 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsr.w r0, r0, r2 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r12 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_d1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r11, lr} +; V7A-NEXT: push {r4, r5, r11, lr} +; V7A-NEXT: uxtb r12, r2 +; V7A-NEXT: lsr lr, r0, r12 +; V7A-NEXT: rsb r0, r12, #32 +; V7A-NEXT: orr r0, lr, r1, lsl r0 +; V7A-NEXT: mvn lr, #31 +; V7A-NEXT: uxtab r2, lr, r2 +; V7A-NEXT: cmp r2, #0 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: rsb r2, r3, #64 +; V7A-NEXT: lsr r1, r1, r12 +; V7A-NEXT: uxtb r3, r2 +; V7A-NEXT: rsb r4, r3, #32 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: uxtab r2, lr, r2 +; V7A-NEXT: lsr r5, r0, r4 +; V7A-NEXT: orr r1, r5, r1, lsl r3 +; V7A-NEXT: cmp r2, #0 +; V7A-NEXT: lslpl r1, r0, r2 +; V7A-NEXT: lsl r0, r0, r3 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lsr r0, r0, r3 +; V7A-NEXT: orr r0, r0, r1, lsl r4 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: lsr r1, r1, r3 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: pop {r4, r5, r11, pc} +; +; V7A-T-LABEL: bextr64_d1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, r5, r6, r7, lr} +; V7A-T-NEXT: push {r4, r5, r6, r7, lr} +; V7A-T-NEXT: uxtb.w r12, r2 +; V7A-T-NEXT: rsb.w r6, r12, #32 +; V7A-T-NEXT: rsb.w r3, r3, #64 +; V7A-T-NEXT: lsr.w r0, r0, r12 +; V7A-T-NEXT: mvn r7, #31 +; V7A-T-NEXT: uxtab r2, r7, r2 +; V7A-T-NEXT: lsl.w r6, r1, r6 +; V7A-T-NEXT: lsr.w lr, r1, r12 +; V7A-T-NEXT: orrs r0, r6 +; V7A-T-NEXT: cmp r2, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl.w lr, #0 +; V7A-T-NEXT: uxtb r5, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: rsb.w r1, r5, #32 +; V7A-T-NEXT: uxtab r3, r7, r3 +; V7A-T-NEXT: lsl.w r4, lr, r5 +; V7A-T-NEXT: lsr.w r2, r0, r1 +; V7A-T-NEXT: cmp r3, #0 +; V7A-T-NEXT: orr.w r2, r2, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r2, r0, r3 +; V7A-T-NEXT: lsl.w r0, r0, r5 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: lsr.w r0, r0, r5 +; V7A-T-NEXT: orr.w r0, r0, r1 +; V7A-T-NEXT: lsr.w r1, r2, r5 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r2, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: pop {r4, r5, r6, r7, pc} +; +; V6M-LABEL: bextr64_d1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: mov r4, r3 +; V6M-NEXT: uxtb r2, r2 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: movs r2, #64 +; V6M-NEXT: subs r2, r2, r4 +; V6M-NEXT: uxtb r4, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, pc} + %skip = zext i8 %numskipbits to i64 + %shifted = lshr i64 %val, %skip + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %highbitscleared = shl i64 %shifted, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_d2_load: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: ldrd r0, r3, [r0] +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: lsl.w r1, r3, r1 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: rsb.w lr, r12, #32 +; V7M-NEXT: orrs r0, r1 +; V7M-NEXT: subs.w r1, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r3, r1 +; V7M-NEXT: rsb.w r1, r12, #64 +; V7M-NEXT: lsr.w r2, r3, r2 +; V7M-NEXT: rsb.w r12, r1, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: cmp.w lr, #0 +; V7M-NEXT: lsl.w r2, r2, r1 +; V7M-NEXT: lsr.w r4, r0, r12 +; V7M-NEXT: orr.w r2, r2, r4 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r2, r0, lr +; V7M-NEXT: lsl.w r0, r0, r1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsl.w r3, r2, r12 +; V7M-NEXT: lsr.w r0, r0, r1 +; V7M-NEXT: lsr.w r1, r2, r1 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r2, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_d2_load: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: ldrd r0, r1, [r0] +; V7A-NEXT: subs lr, r2, #32 +; V7A-NEXT: lsr r3, r1, r2 +; V7A-NEXT: ldr r12, [sp, #8] +; V7A-NEXT: movwpl r3, #0 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: rsb r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r2 +; V7A-NEXT: lsrpl r0, r1, lr +; V7A-NEXT: rsb r1, r12, #64 +; V7A-NEXT: rsb lr, r1, #32 +; V7A-NEXT: lsr r2, r0, lr +; V7A-NEXT: orr r2, r2, r3, lsl r1 +; V7A-NEXT: rsbs r3, r12, #32 +; V7A-NEXT: lslpl r2, r0, r3 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: lsr r1, r2, r1 +; V7A-NEXT: orr r0, r0, r2, lsl lr +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: lsrpl r0, r2, r3 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bextr64_d2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: ldrd r0, r3, [r0] +; V7A-T-NEXT: rsb.w r1, r2, #32 +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: lsl.w r1, r3, r1 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: orrs r0, r1 +; V7A-T-NEXT: subs.w r1, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r3, r1 +; V7A-T-NEXT: lsr.w r2, r3, r2 +; V7A-T-NEXT: rsb.w r1, r12, #64 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: rsb.w lr, r1, #32 +; V7A-T-NEXT: rsbs.w r3, r12, #32 +; V7A-T-NEXT: lsl.w r2, r2, r1 +; V7A-T-NEXT: lsr.w r4, r0, lr +; V7A-T-NEXT: orr.w r2, r2, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r2, r0, r3 +; V7A-T-NEXT: lsl.w r0, r0, r1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsl.w r4, r2, lr +; V7A-T-NEXT: lsr.w r0, r0, r1 +; V7A-T-NEXT: lsr.w r1, r2, r1 +; V7A-T-NEXT: orr.w r0, r0, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r2, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_d2_load: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: ldr r3, [r0] +; V6M-NEXT: ldr r1, [r0, #4] +; V6M-NEXT: mov r0, r3 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldr r2, [sp, #8] +; V6M-NEXT: movs r3, #64 +; V6M-NEXT: subs r4, r3, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, pc} + %val = load i64, ptr %w + %shifted = lshr i64 %val, %numskipbits + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %shifted, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { +; V7M-LABEL: bextr64_d3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: ldrd r0, lr, [r0] +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: rsb.w r2, r2, #64 +; V7M-NEXT: subs.w r3, r1, #32 +; V7M-NEXT: lsr.w r12, r0, r1 +; V7M-NEXT: rsb.w r0, r1, #32 +; V7M-NEXT: lsr.w r1, lr, r1 +; V7M-NEXT: uxtb r2, r2 +; V7M-NEXT: lsl.w r0, lr, r0 +; V7M-NEXT: orr.w r0, r0, r12 +; V7M-NEXT: sub.w r12, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, lr, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsls r1, r2 +; V7M-NEXT: cmp.w r12, #0 +; V7M-NEXT: lsr.w r4, r0, r3 +; V7M-NEXT: orr.w r1, r1, r4 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, r0, r12 +; V7M-NEXT: lsl.w r0, r0, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: lsr.w r0, r0, r2 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r12 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_d3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r5, r11, lr} +; V7A-NEXT: push {r4, r5, r11, lr} +; V7A-NEXT: ldr r4, [r0] +; V7A-NEXT: ldr r3, [r0, #4] +; V7A-NEXT: uxtb r0, r1 +; V7A-NEXT: lsr r12, r4, r0 +; V7A-NEXT: rsb r4, r0, #32 +; V7A-NEXT: lsr r0, r3, r0 +; V7A-NEXT: orr r4, r12, r3, lsl r4 +; V7A-NEXT: mvn r12, #31 +; V7A-NEXT: uxtab r1, r12, r1 +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: lsrpl r4, r3, r1 +; V7A-NEXT: rsb r1, r2, #64 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: uxtb r2, r1 +; V7A-NEXT: rsb lr, r2, #32 +; V7A-NEXT: uxtab r1, r12, r1 +; V7A-NEXT: lsr r5, r4, lr +; V7A-NEXT: orr r3, r5, r0, lsl r2 +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: lsl r0, r4, r2 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lslpl r3, r4, r1 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: orr r0, r0, r3, lsl lr +; V7A-NEXT: lsrpl r0, r3, r1 +; V7A-NEXT: lsr r1, r3, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: pop {r4, r5, r11, pc} +; +; V7A-T-LABEL: bextr64_d3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, r5, r6, lr} +; V7A-T-NEXT: push {r4, r5, r6, lr} +; V7A-T-NEXT: ldrd r12, lr, [r0] +; V7A-T-NEXT: uxtb r0, r1 +; V7A-T-NEXT: rsb.w r6, r0, #32 +; V7A-T-NEXT: lsr.w r3, lr, r0 +; V7A-T-NEXT: rsb.w r2, r2, #64 +; V7A-T-NEXT: mvn r4, #31 +; V7A-T-NEXT: lsr.w r0, r12, r0 +; V7A-T-NEXT: uxtab r1, r4, r1 +; V7A-T-NEXT: lsl.w r6, lr, r6 +; V7A-T-NEXT: orrs r0, r6 +; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r3, #0 +; V7A-T-NEXT: uxtb r5, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, lr, r1 +; V7A-T-NEXT: rsb.w r1, r5, #32 +; V7A-T-NEXT: lsls r3, r5 +; V7A-T-NEXT: uxtab r2, r4, r2 +; V7A-T-NEXT: lsr.w r6, r0, r1 +; V7A-T-NEXT: orrs r3, r6 +; V7A-T-NEXT: cmp r2, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r3, r0, r2 +; V7A-T-NEXT: lsl.w r0, r0, r5 +; V7A-T-NEXT: lsl.w r1, r3, r1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsr.w r0, r0, r5 +; V7A-T-NEXT: orr.w r0, r0, r1 +; V7A-T-NEXT: lsr.w r1, r3, r5 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r3, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: pop {r4, r5, r6, pc} +; +; V6M-LABEL: bextr64_d3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r2 +; V6M-NEXT: ldr r5, [r0] +; V6M-NEXT: ldr r3, [r0, #4] +; V6M-NEXT: uxtb r2, r1 +; V6M-NEXT: mov r0, r5 +; V6M-NEXT: mov r1, r3 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: movs r2, #64 +; V6M-NEXT: subs r2, r2, r4 +; V6M-NEXT: uxtb r4, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, r5, r7, pc} + %val = load i64, ptr %w + %skip = zext i8 %numskipbits to i64 + %shifted = lshr i64 %val, %skip + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %highbitscleared = shl i64 %shifted, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +; 64-bit, but with 32-bit output + +; Everything done in 64-bit, truncation happens last. +define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_d0: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r4, lr} +; V7M-NEXT: push {r4, lr} +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: ldr.w r12, [sp, #8] +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orrs r0, r3 +; V7M-NEXT: subs.w r3, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r3 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: rsb.w r3, r12, #64 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: rsb.w lr, r12, #32 +; V7M-NEXT: rsb.w r12, r3, #32 +; V7M-NEXT: lsls r1, r3 +; V7M-NEXT: cmp.w lr, #0 +; V7M-NEXT: lsr.w r4, r0, r12 +; V7M-NEXT: orr.w r1, r1, r4 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, r0, lr +; V7M-NEXT: lsl.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsl.w r2, r1, r12 +; V7M-NEXT: lsr.w r0, r0, r3 +; V7M-NEXT: orr.w r0, r0, r2 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, lr +; V7M-NEXT: pop {r4, pc} +; +; V7A-LABEL: bextr64_32_d0: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: lsr r3, r1, r2 +; V7A-NEXT: subs lr, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: rsb r2, r2, #32 +; V7A-NEXT: ldr r12, [sp, #8] +; V7A-NEXT: movwpl r3, #0 +; V7A-NEXT: orr r0, r0, r1, lsl r2 +; V7A-NEXT: lsrpl r0, r1, lr +; V7A-NEXT: rsb r1, r12, #64 +; V7A-NEXT: rsb lr, r1, #32 +; V7A-NEXT: lsr r2, r0, lr +; V7A-NEXT: orr r2, r2, r3, lsl r1 +; V7A-NEXT: rsbs r3, r12, #32 +; V7A-NEXT: lslpl r2, r0, r3 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: orr r0, r0, r2, lsl lr +; V7A-NEXT: lsrpl r0, r2, r3 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bextr64_32_d0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: ldr.w r12, [sp, #8] +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orrs r0, r3 +; V7A-T-NEXT: subs.w r3, r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r3 +; V7A-T-NEXT: lsr.w r1, r1, r2 +; V7A-T-NEXT: rsb.w r3, r12, #64 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: rsb.w lr, r3, #32 +; V7A-T-NEXT: lsls r1, r3 +; V7A-T-NEXT: rsbs.w r2, r12, #32 +; V7A-T-NEXT: lsr.w r4, r0, lr +; V7A-T-NEXT: orr.w r1, r1, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r1, r0, r2 +; V7A-T-NEXT: lsl.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsl.w r4, r1, lr +; V7A-T-NEXT: lsr.w r0, r0, r3 +; V7A-T-NEXT: orr.w r0, r0, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bextr64_32_d0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldr r2, [sp, #8] +; V6M-NEXT: movs r3, #64 +; V6M-NEXT: subs r4, r3, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, pc} + %shifted = lshr i64 %val, %numskipbits + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %shifted, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + %res = trunc i64 %masked to i32 + ret i32 %res +} + +; Shifting happens in 64-bit, then truncation. Masking is 32-bit. +define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { +; V7M-LABEL: bextr64_32_d1: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsrs r0, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: ldr r1, [sp] +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bextr64_32_d1: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r3, r2, #32 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: ldr r12, [sp] +; V7A-NEXT: subs r2, r2, #32 +; V7A-NEXT: orr r0, r0, r1, lsl r3 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: rsb r1, r12, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bextr64_32_d1: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: lsrs r0, r2 +; V7A-T-NEXT: ldr.w r12, [sp] +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: lsl.w r3, r1, r3 +; V7A-T-NEXT: orr.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: rsb.w r1, r12, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bextr64_32_d1: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r7, lr} +; V6M-NEXT: push {r7, lr} +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldr r1, [sp, #8] +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: pop {r7, pc} + %shifted = lshr i64 %val, %numskipbits + %truncshifted = trunc i64 %shifted to i32 + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %truncshifted, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +; ---------------------------------------------------------------------------- ; +; Constant +; ---------------------------------------------------------------------------- ; + +; https://bugs.llvm.org/show_bug.cgi?id=38938 +define void @pr38938(ptr %a0, ptr %a1) nounwind { +; V7M-LABEL: pr38938: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r1, [r1] +; V7M-NEXT: ubfx r1, r1, #21, #10 +; V7M-NEXT: ldr.w r2, [r0, r1, lsl #2] +; V7M-NEXT: adds r2, #1 +; V7M-NEXT: str.w r2, [r0, r1, lsl #2] +; V7M-NEXT: bx lr +; +; V7A-LABEL: pr38938: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r1, [r1] +; V7A-NEXT: ubfx r1, r1, #21, #10 +; V7A-NEXT: ldr r2, [r0, r1, lsl #2] +; V7A-NEXT: add r2, r2, #1 +; V7A-NEXT: str r2, [r0, r1, lsl #2] +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: pr38938: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r1, [r1] +; V7A-T-NEXT: ubfx r1, r1, #21, #10 +; V7A-T-NEXT: ldr.w r2, [r0, r1, lsl #2] +; V7A-T-NEXT: adds r2, #1 +; V7A-T-NEXT: str.w r2, [r0, r1, lsl #2] +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: pr38938: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r1, [r1] +; V6M-NEXT: lsrs r1, r1, #19 +; V6M-NEXT: ldr r2, .LCPI51_0 +; V6M-NEXT: ands r2, r1 +; V6M-NEXT: ldr r1, [r0, r2] +; V6M-NEXT: adds r1, r1, #1 +; V6M-NEXT: str r1, [r0, r2] +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI51_0: +; V6M-NEXT: .long 4092 @ 0xffc + %tmp = load i64, ptr %a1, align 8 + %tmp1 = lshr i64 %tmp, 21 + %tmp2 = and i64 %tmp1, 1023 + %tmp3 = getelementptr inbounds i32, ptr %a0, i64 %tmp2 + %tmp4 = load i32, ptr %tmp3, align 4 + %tmp5 = add nsw i32 %tmp4, 1 + store i32 %tmp5, ptr %tmp3, align 4 + ret void +} + +; The most canonical variant +define i32 @c0_i32(i32 %arg) nounwind { +; V7M-LABEL: c0_i32: +; V7M: @ %bb.0: +; V7M-NEXT: ubfx r0, r0, #19, #10 +; V7M-NEXT: bx lr +; +; V7A-LABEL: c0_i32: +; V7A: @ %bb.0: +; V7A-NEXT: ubfx r0, r0, #19, #10 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c0_i32: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ubfx r0, r0, #19, #10 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c0_i32: +; V6M: @ %bb.0: +; V6M-NEXT: lsls r0, r0, #3 +; V6M-NEXT: lsrs r0, r0, #22 +; V6M-NEXT: bx lr + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 1023 + ret i32 %tmp1 +} + +; Should be still fine, but the mask is shifted +define i32 @c1_i32(i32 %arg) nounwind { +; V7M-LABEL: c1_i32: +; V7M: @ %bb.0: +; V7M-NEXT: movw r1, #4092 +; V7M-NEXT: and.w r0, r1, r0, lsr #19 +; V7M-NEXT: bx lr +; +; V7A-LABEL: c1_i32: +; V7A: @ %bb.0: +; V7A-NEXT: movw r1, #4092 +; V7A-NEXT: and r0, r1, r0, lsr #19 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c1_i32: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movw r1, #4092 +; V7A-T-NEXT: and.w r0, r1, r0, lsr #19 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c1_i32: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r1, r0, #19 +; V6M-NEXT: ldr r0, .LCPI53_0 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI53_0: +; V6M-NEXT: .long 4092 @ 0xffc + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 4092 + ret i32 %tmp1 +} + +; Should be still fine, but the result is shifted left afterwards +define i32 @c2_i32(i32 %arg) nounwind { +; V7M-LABEL: c2_i32: +; V7M: @ %bb.0: +; V7M-NEXT: movw r1, #4092 +; V7M-NEXT: and.w r0, r1, r0, lsr #17 +; V7M-NEXT: bx lr +; +; V7A-LABEL: c2_i32: +; V7A: @ %bb.0: +; V7A-NEXT: movw r1, #4092 +; V7A-NEXT: and r0, r1, r0, lsr #17 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c2_i32: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movw r1, #4092 +; V7A-T-NEXT: and.w r0, r1, r0, lsr #17 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c2_i32: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r1, r0, #17 +; V6M-NEXT: ldr r0, .LCPI54_0 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI54_0: +; V6M-NEXT: .long 4092 @ 0xffc + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 1023 + %tmp2 = shl i32 %tmp1, 2 + ret i32 %tmp2 +} + +; The mask covers newly shifted-in bit +define i32 @c4_i32_bad(i32 %arg) nounwind { +; V7M-LABEL: c4_i32_bad: +; V7M: @ %bb.0: +; V7M-NEXT: mvn r1, #1 +; V7M-NEXT: and.w r0, r1, r0, lsr #19 +; V7M-NEXT: bx lr +; +; V7A-LABEL: c4_i32_bad: +; V7A: @ %bb.0: +; V7A-NEXT: mvn r1, #1 +; V7A-NEXT: and r0, r1, r0, lsr #19 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c4_i32_bad: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mvn r1, #1 +; V7A-T-NEXT: and.w r0, r1, r0, lsr #19 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c4_i32_bad: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r0, #20 +; V6M-NEXT: lsls r0, r0, #1 +; V6M-NEXT: bx lr + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 16382 + ret i32 %tmp1 +} + +; i64 + +; The most canonical variant +define i64 @c0_i64(i64 %arg) nounwind { +; V7M-LABEL: c0_i64: +; V7M: @ %bb.0: +; V7M-NEXT: ubfx r0, r1, #19, #10 +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: c0_i64: +; V7A: @ %bb.0: +; V7A-NEXT: ubfx r0, r1, #19, #10 +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c0_i64: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ubfx r0, r1, #19, #10 +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c0_i64: +; V6M: @ %bb.0: +; V6M-NEXT: lsls r0, r1, #3 +; V6M-NEXT: lsrs r0, r0, #22 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 1023 + ret i64 %tmp1 +} + +; Should be still fine, but the mask is shifted +define i64 @c1_i64(i64 %arg) nounwind { +; V7M-LABEL: c1_i64: +; V7M: @ %bb.0: +; V7M-NEXT: movw r0, #4092 +; V7M-NEXT: and.w r0, r0, r1, lsr #19 +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: c1_i64: +; V7A: @ %bb.0: +; V7A-NEXT: movw r0, #4092 +; V7A-NEXT: and r0, r0, r1, lsr #19 +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c1_i64: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movw r0, #4092 +; V7A-T-NEXT: and.w r0, r0, r1, lsr #19 +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c1_i64: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r1, r1, #19 +; V6M-NEXT: ldr r0, .LCPI57_0 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI57_0: +; V6M-NEXT: .long 4092 @ 0xffc + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 4092 + ret i64 %tmp1 +} + +; Should be still fine, but the result is shifted left afterwards +define i64 @c2_i64(i64 %arg) nounwind { +; V7M-LABEL: c2_i64: +; V7M: @ %bb.0: +; V7M-NEXT: movw r0, #4092 +; V7M-NEXT: and.w r0, r0, r1, lsr #17 +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: c2_i64: +; V7A: @ %bb.0: +; V7A-NEXT: movw r0, #4092 +; V7A-NEXT: and r0, r0, r1, lsr #17 +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c2_i64: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movw r0, #4092 +; V7A-T-NEXT: and.w r0, r0, r1, lsr #17 +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c2_i64: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r1, r1, #17 +; V6M-NEXT: ldr r0, .LCPI58_0 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI58_0: +; V6M-NEXT: .long 4092 @ 0xffc + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 1023 + %tmp2 = shl i64 %tmp1, 2 + ret i64 %tmp2 +} + +; The mask covers newly shifted-in bit +define i64 @c4_i64_bad(i64 %arg) nounwind { +; V7M-LABEL: c4_i64_bad: +; V7M: @ %bb.0: +; V7M-NEXT: mvn r0, #1 +; V7M-NEXT: and.w r0, r0, r1, lsr #19 +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: c4_i64_bad: +; V7A: @ %bb.0: +; V7A-NEXT: mvn r0, #1 +; V7A-NEXT: and r0, r0, r1, lsr #19 +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c4_i64_bad: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mvn r0, #1 +; V7A-T-NEXT: and.w r0, r0, r1, lsr #19 +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c4_i64_bad: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r1, #20 +; V6M-NEXT: lsls r0, r0, #1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 16382 + ret i64 %tmp1 +} + +; ---------------------------------------------------------------------------- ; +; Constant, storing the result afterwards. +; ---------------------------------------------------------------------------- ; + +; i32 + +; The most canonical variant +define void @c5_i32(i32 %arg, ptr %ptr) nounwind { +; V7M-LABEL: c5_i32: +; V7M: @ %bb.0: +; V7M-NEXT: ubfx r0, r0, #19, #10 +; V7M-NEXT: str r0, [r1] +; V7M-NEXT: bx lr +; +; V7A-LABEL: c5_i32: +; V7A: @ %bb.0: +; V7A-NEXT: ubfx r0, r0, #19, #10 +; V7A-NEXT: str r0, [r1] +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c5_i32: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ubfx r0, r0, #19, #10 +; V7A-T-NEXT: str r0, [r1] +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c5_i32: +; V6M: @ %bb.0: +; V6M-NEXT: lsls r0, r0, #3 +; V6M-NEXT: lsrs r0, r0, #22 +; V6M-NEXT: str r0, [r1] +; V6M-NEXT: bx lr + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 1023 + store i32 %tmp1, ptr %ptr + ret void +} + +; Should be still fine, but the mask is shifted +define void @c6_i32(i32 %arg, ptr %ptr) nounwind { +; V7M-LABEL: c6_i32: +; V7M: @ %bb.0: +; V7M-NEXT: ubfx r0, r0, #19, #12 +; V7M-NEXT: str r0, [r1] +; V7M-NEXT: bx lr +; +; V7A-LABEL: c6_i32: +; V7A: @ %bb.0: +; V7A-NEXT: ubfx r0, r0, #19, #12 +; V7A-NEXT: str r0, [r1] +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c6_i32: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ubfx r0, r0, #19, #12 +; V7A-T-NEXT: str r0, [r1] +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c6_i32: +; V6M: @ %bb.0: +; V6M-NEXT: lsls r0, r0, #1 +; V6M-NEXT: lsrs r0, r0, #20 +; V6M-NEXT: str r0, [r1] +; V6M-NEXT: bx lr + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 4095 + store i32 %tmp1, ptr %ptr + ret void +} + +; Should be still fine, but the result is shifted left afterwards +define void @c7_i32(i32 %arg, ptr %ptr) nounwind { +; V7M-LABEL: c7_i32: +; V7M: @ %bb.0: +; V7M-NEXT: movw r2, #4092 +; V7M-NEXT: and.w r0, r2, r0, lsr #17 +; V7M-NEXT: str r0, [r1] +; V7M-NEXT: bx lr +; +; V7A-LABEL: c7_i32: +; V7A: @ %bb.0: +; V7A-NEXT: movw r2, #4092 +; V7A-NEXT: and r0, r2, r0, lsr #17 +; V7A-NEXT: str r0, [r1] +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c7_i32: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movw r2, #4092 +; V7A-T-NEXT: and.w r0, r2, r0, lsr #17 +; V7A-T-NEXT: str r0, [r1] +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c7_i32: +; V6M: @ %bb.0: +; V6M-NEXT: lsrs r0, r0, #17 +; V6M-NEXT: ldr r2, .LCPI62_0 +; V6M-NEXT: ands r2, r0 +; V6M-NEXT: str r2, [r1] +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI62_0: +; V6M-NEXT: .long 4092 @ 0xffc + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 1023 + %tmp2 = shl i32 %tmp1, 2 + store i32 %tmp2, ptr %ptr + ret void +} + +; i64 + +; The most canonical variant +define void @c5_i64(i64 %arg, ptr %ptr) nounwind { +; V7M-LABEL: c5_i64: +; V7M: @ %bb.0: +; V7M-NEXT: movs r0, #0 +; V7M-NEXT: ubfx r1, r1, #19, #10 +; V7M-NEXT: strd r1, r0, [r2] +; V7M-NEXT: bx lr +; +; V7A-LABEL: c5_i64: +; V7A: @ %bb.0: +; V7A-NEXT: mov r0, #0 +; V7A-NEXT: str r0, [r2, #4] +; V7A-NEXT: ubfx r0, r1, #19, #10 +; V7A-NEXT: str r0, [r2] +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c5_i64: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movs r0, #0 +; V7A-T-NEXT: ubfx r1, r1, #19, #10 +; V7A-T-NEXT: strd r1, r0, [r2] +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c5_i64: +; V6M: @ %bb.0: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: lsls r1, r1, #3 +; V6M-NEXT: lsrs r1, r1, #22 +; V6M-NEXT: str r1, [r2] +; V6M-NEXT: str r0, [r2, #4] +; V6M-NEXT: bx lr + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 1023 + store i64 %tmp1, ptr %ptr + ret void +} + +; Should be still fine, but the mask is shifted +define void @c6_i64(i64 %arg, ptr %ptr) nounwind { +; V7M-LABEL: c6_i64: +; V7M: @ %bb.0: +; V7M-NEXT: movs r0, #0 +; V7M-NEXT: ubfx r1, r1, #19, #12 +; V7M-NEXT: strd r1, r0, [r2] +; V7M-NEXT: bx lr +; +; V7A-LABEL: c6_i64: +; V7A: @ %bb.0: +; V7A-NEXT: mov r0, #0 +; V7A-NEXT: str r0, [r2, #4] +; V7A-NEXT: ubfx r0, r1, #19, #12 +; V7A-NEXT: str r0, [r2] +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c6_i64: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movs r0, #0 +; V7A-T-NEXT: ubfx r1, r1, #19, #12 +; V7A-T-NEXT: strd r1, r0, [r2] +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c6_i64: +; V6M: @ %bb.0: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: lsls r1, r1, #1 +; V6M-NEXT: lsrs r1, r1, #20 +; V6M-NEXT: str r1, [r2] +; V6M-NEXT: str r0, [r2, #4] +; V6M-NEXT: bx lr + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 4095 + store i64 %tmp1, ptr %ptr + ret void +} + +; Should be still fine, but the result is shifted left afterwards +define void @c7_i64(i64 %arg, ptr %ptr) nounwind { +; V7M-LABEL: c7_i64: +; V7M: @ %bb.0: +; V7M-NEXT: movs r0, #0 +; V7M-NEXT: movw r3, #4092 +; V7M-NEXT: and.w r1, r3, r1, lsr #17 +; V7M-NEXT: strd r1, r0, [r2] +; V7M-NEXT: bx lr +; +; V7A-LABEL: c7_i64: +; V7A: @ %bb.0: +; V7A-NEXT: movw r0, #4092 +; V7A-NEXT: mov r3, #0 +; V7A-NEXT: and r0, r0, r1, lsr #17 +; V7A-NEXT: stm r2, {r0, r3} +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: c7_i64: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movs r0, #0 +; V7A-T-NEXT: movw r3, #4092 +; V7A-T-NEXT: and.w r1, r3, r1, lsr #17 +; V7A-T-NEXT: strd r1, r0, [r2] +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: c7_i64: +; V6M: @ %bb.0: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: lsrs r1, r1, #17 +; V6M-NEXT: ldr r3, .LCPI65_0 +; V6M-NEXT: ands r3, r1 +; V6M-NEXT: str r3, [r2] +; V6M-NEXT: str r0, [r2, #4] +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI65_0: +; V6M-NEXT: .long 4092 @ 0xffc + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 1023 + %tmp2 = shl i64 %tmp1, 2 + store i64 %tmp2, ptr %ptr + ret void +} diff --git a/llvm/test/CodeGen/ARM/extract-lowbits.ll b/llvm/test/CodeGen/ARM/extract-lowbits.ll new file mode 100644 index 000000000000..373d998a0eeb --- /dev/null +++ b/llvm/test/CodeGen/ARM/extract-lowbits.ll @@ -0,0 +1,2752 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M +; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A +; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T +; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M + +; Patterns: +; a) x & (1 << nbits) - 1 +; b) x & ~(-1 << nbits) +; c) x & (-1 >> (32 - y)) +; d) x << (32 - y) >> (32 - y) +; are equivalent. + +; ---------------------------------------------------------------------------- ; +; Pattern a. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_a0: +; V7M: @ %bb.0: +; V7M-NEXT: movs r2, #1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_a0: +; V7A: @ %bb.0: +; V7A-NEXT: mov r2, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r1, r3, r2, lsl r1 +; V7A-NEXT: and r0, r1, r0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_a0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movs r2, #1 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_a0: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #1 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: subs r1, r2, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bzhi32_a1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: movs r2, #1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_a1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: mov r2, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r1, r3, r2, lsl r1 +; V7A-NEXT: and r0, r1, r0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_a1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movs r2, #1 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_a1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #1 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: subs r1, r2, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_a2_load: +; V7M: @ %bb.0: +; V7M-NEXT: movs r2, #1 +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_a2_load: +; V7A: @ %bb.0: +; V7A-NEXT: mov r2, #1 +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r1, r3, r2, lsl r1 +; V7A-NEXT: and r0, r1, r0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_a2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movs r2, #1 +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_a2_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #1 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: subs r1, r2, #1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bzhi32_a3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: movs r2, #1 +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_a3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: mov r2, #1 +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r1, r3, r2, lsl r1 +; V7A-NEXT: and r0, r1, r0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_a3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movs r2, #1 +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_a3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #1 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: subs r1, r2, #1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_a4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: movs r2, #1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: subs r1, #1 +; V7M-NEXT: ands r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_a4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: mov r2, #1 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: add r1, r3, r2, lsl r1 +; V7A-NEXT: and r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_a4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: movs r2, #1 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: subs r1, #1 +; V7A-T-NEXT: ands r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_a4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #1 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: subs r1, r2, #1 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_a0: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: mov.w r12, #1 +; V7M-NEXT: subs.w lr, r2, #32 +; V7M-NEXT: lsl.w r2, r12, r2 +; V7M-NEXT: lsr.w r3, r12, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r3, r12, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: subs r2, #1 +; V7M-NEXT: sbc r3, r3, #0 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bzhi64_a0: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsb r12, r2, #32 +; V7A-NEXT: mov lr, #1 +; V7A-NEXT: subs r3, r2, #32 +; V7A-NEXT: lsl r2, lr, r2 +; V7A-NEXT: lsr r12, lr, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: lslpl r12, lr, r3 +; V7A-NEXT: subs r2, r2, #1 +; V7A-NEXT: sbc r3, r12, #0 +; V7A-NEXT: and r0, r2, r0 +; V7A-NEXT: and r1, r3, r1 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_a0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: mov.w r12, #1 +; V7A-T-NEXT: subs.w lr, r2, #32 +; V7A-T-NEXT: lsl.w r2, r12, r2 +; V7A-T-NEXT: lsr.w r3, r12, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r3, r12, lr +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: subs r2, #1 +; V7A-T-NEXT: sbc r3, r3, #0 +; V7A-T-NEXT: ands r0, r2 +; V7A-T-NEXT: ands r1, r3 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_a0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, lr} +; V6M-NEXT: push {r4, r5, r6, lr} +; V6M-NEXT: mov r5, r1 +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r6, #0 +; V6M-NEXT: mov r1, r6 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: subs r0, r0, #1 +; V6M-NEXT: sbcs r1, r6 +; V6M-NEXT: ands r1, r5 +; V6M-NEXT: ands r0, r4 +; V6M-NEXT: pop {r4, r5, r6, pc} + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +; Check that we don't throw away the vreg_width-1 mask if not using shifts +define i64 @bzhi64_a0_masked(i64 %val, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_a0_masked: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: and r2, r2, #63 +; V7M-NEXT: mov.w r12, #1 +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: subs.w lr, r2, #32 +; V7M-NEXT: lsl.w r2, r12, r2 +; V7M-NEXT: lsr.w r3, r12, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r3, r12, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: subs r2, #1 +; V7M-NEXT: sbc r3, r3, #0 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bzhi64_a0_masked: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: and r2, r2, #63 +; V7A-NEXT: mov lr, #1 +; V7A-NEXT: rsb r12, r2, #32 +; V7A-NEXT: subs r3, r2, #32 +; V7A-NEXT: lsl r2, lr, r2 +; V7A-NEXT: lsr r12, lr, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: lslpl r12, lr, r3 +; V7A-NEXT: subs r2, r2, #1 +; V7A-NEXT: sbc r3, r12, #0 +; V7A-NEXT: and r0, r2, r0 +; V7A-NEXT: and r1, r3, r1 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_a0_masked: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: and r2, r2, #63 +; V7A-T-NEXT: mov.w r12, #1 +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: subs.w lr, r2, #32 +; V7A-T-NEXT: lsl.w r2, r12, r2 +; V7A-T-NEXT: lsr.w r3, r12, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r3, r12, lr +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: subs r2, #1 +; V7A-T-NEXT: sbc r3, r3, #0 +; V7A-T-NEXT: ands r0, r2 +; V7A-T-NEXT: ands r1, r3 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_a0_masked: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, lr} +; V6M-NEXT: push {r4, r5, r6, lr} +; V6M-NEXT: mov r5, r1 +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #63 +; V6M-NEXT: ands r2, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r6, #0 +; V6M-NEXT: mov r1, r6 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: subs r0, r0, #1 +; V6M-NEXT: sbcs r1, r6 +; V6M-NEXT: ands r1, r5 +; V6M-NEXT: ands r0, r4 +; V6M-NEXT: pop {r4, r5, r6, pc} + %numlowbits.masked = and i64 %numlowbits, 63 + %onebit = shl i64 1, %numlowbits.masked + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bzhi64_a1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: mov.w r12, #1 +; V7M-NEXT: subs.w lr, r2, #32 +; V7M-NEXT: lsl.w r2, r12, r2 +; V7M-NEXT: lsr.w r3, r12, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r3, r12, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: subs r2, #1 +; V7M-NEXT: sbc r3, r3, #0 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bzhi64_a1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsb r12, r2, #32 +; V7A-NEXT: mov lr, #1 +; V7A-NEXT: subs r3, r2, #32 +; V7A-NEXT: lsl r2, lr, r2 +; V7A-NEXT: lsr r12, lr, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: lslpl r12, lr, r3 +; V7A-NEXT: subs r2, r2, #1 +; V7A-NEXT: sbc r3, r12, #0 +; V7A-NEXT: and r0, r2, r0 +; V7A-NEXT: and r1, r3, r1 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_a1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: mov.w r12, #1 +; V7A-T-NEXT: subs.w lr, r2, #32 +; V7A-T-NEXT: lsl.w r2, r12, r2 +; V7A-T-NEXT: lsr.w r3, r12, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r3, r12, lr +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: subs r2, #1 +; V7A-T-NEXT: sbc r3, r3, #0 +; V7A-T-NEXT: ands r0, r2 +; V7A-T-NEXT: ands r1, r3 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_a1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, lr} +; V6M-NEXT: push {r4, r5, r6, lr} +; V6M-NEXT: mov r5, r1 +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r6, #0 +; V6M-NEXT: mov r1, r6 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: subs r0, r0, #1 +; V6M-NEXT: sbcs r1, r6 +; V6M-NEXT: ands r1, r5 +; V6M-NEXT: ands r0, r4 +; V6M-NEXT: pop {r4, r5, r6, pc} + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_a2_load: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r2, #32 +; V7M-NEXT: movs r3, #1 +; V7M-NEXT: subs.w r12, r2, #32 +; V7M-NEXT: lsl.w r2, r3, r2 +; V7M-NEXT: lsr.w r1, r3, r1 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, r3, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: subs r2, #1 +; V7M-NEXT: ldrd r0, r3, [r0] +; V7M-NEXT: sbc r1, r1, #0 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_a2_load: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r6, r11, lr} +; V7A-NEXT: push {r4, r6, r11, lr} +; V7A-NEXT: ldr r6, [r0] +; V7A-NEXT: mov r1, #1 +; V7A-NEXT: ldr r3, [r0, #4] +; V7A-NEXT: rsb r0, r2, #32 +; V7A-NEXT: subs r4, r2, #32 +; V7A-NEXT: lsr r0, r1, r0 +; V7A-NEXT: lslpl r0, r1, r4 +; V7A-NEXT: lsl r1, r1, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: subs r2, r1, #1 +; V7A-NEXT: sbc r0, r0, #0 +; V7A-NEXT: and r1, r0, r3 +; V7A-NEXT: and r0, r2, r6 +; V7A-NEXT: pop {r4, r6, r11, pc} +; +; V7A-T-LABEL: bzhi64_a2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: movs r1, #1 +; V7A-T-NEXT: ldrd r12, lr, [r0] +; V7A-T-NEXT: subs.w r0, r2, #32 +; V7A-T-NEXT: lsr.w r3, r1, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r3, r1, r0 +; V7A-T-NEXT: lsl.w r0, r1, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: subs r0, #1 +; V7A-T-NEXT: sbc r1, r3, #0 +; V7A-T-NEXT: and.w r0, r0, r12 +; V7A-T-NEXT: and.w r1, r1, lr +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_a2_load: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r5, #0 +; V6M-NEXT: mov r1, r5 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: subs r2, r0, #1 +; V6M-NEXT: sbcs r1, r5 +; V6M-NEXT: ldm r4!, {r0, r3} +; V6M-NEXT: ands r1, r3 +; V6M-NEXT: ands r0, r2 +; V6M-NEXT: pop {r4, r5, r7, pc} + %val = load i64, ptr %w + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bzhi64_a3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r2, r1, #32 +; V7M-NEXT: movs r3, #1 +; V7M-NEXT: subs.w r12, r1, #32 +; V7M-NEXT: lsl.w r1, r3, r1 +; V7M-NEXT: lsr.w r2, r3, r2 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r2, r3, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: subs r3, r1, #1 +; V7M-NEXT: sbc r1, r2, #0 +; V7M-NEXT: ldrd r0, r2, [r0] +; V7M-NEXT: ands r1, r2 +; V7M-NEXT: ands r0, r3 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_a3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r6, r11, lr} +; V7A-NEXT: push {r4, r6, r11, lr} +; V7A-NEXT: ldr r6, [r0] +; V7A-NEXT: mov r2, #1 +; V7A-NEXT: ldr r3, [r0, #4] +; V7A-NEXT: rsb r0, r1, #32 +; V7A-NEXT: subs r4, r1, #32 +; V7A-NEXT: lsl r1, r2, r1 +; V7A-NEXT: lsr r0, r2, r0 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: lslpl r0, r2, r4 +; V7A-NEXT: subs r2, r1, #1 +; V7A-NEXT: sbc r0, r0, #0 +; V7A-NEXT: and r1, r0, r3 +; V7A-NEXT: and r0, r2, r6 +; V7A-NEXT: pop {r4, r6, r11, pc} +; +; V7A-T-LABEL: bzhi64_a3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r1, #32 +; V7A-T-NEXT: movs r2, #1 +; V7A-T-NEXT: ldrd r12, lr, [r0] +; V7A-T-NEXT: subs.w r0, r1, #32 +; V7A-T-NEXT: lsr.w r3, r2, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r3, r2, r0 +; V7A-T-NEXT: lsl.w r0, r2, r1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: subs r0, #1 +; V7A-T-NEXT: sbc r1, r3, #0 +; V7A-T-NEXT: and.w r0, r0, r12 +; V7A-T-NEXT: and.w r1, r1, lr +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_a3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r2, r1 +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r5, #0 +; V6M-NEXT: mov r1, r5 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: subs r2, r0, #1 +; V6M-NEXT: sbcs r1, r5 +; V6M-NEXT: ldm r4!, {r0, r3} +; V6M-NEXT: ands r1, r3 +; V6M-NEXT: ands r0, r2 +; V6M-NEXT: pop {r4, r5, r7, pc} + %val = load i64, ptr %w + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_a4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: mov.w r12, #1 +; V7M-NEXT: subs.w lr, r2, #32 +; V7M-NEXT: lsl.w r2, r12, r2 +; V7M-NEXT: lsr.w r3, r12, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r3, r12, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: subs r2, #1 +; V7M-NEXT: sbc r3, r3, #0 +; V7M-NEXT: ands r0, r2 +; V7M-NEXT: ands r1, r3 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bzhi64_a4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsb r12, r2, #32 +; V7A-NEXT: mov lr, #1 +; V7A-NEXT: subs r3, r2, #32 +; V7A-NEXT: lsl r2, lr, r2 +; V7A-NEXT: lsr r12, lr, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: lslpl r12, lr, r3 +; V7A-NEXT: subs r2, r2, #1 +; V7A-NEXT: sbc r3, r12, #0 +; V7A-NEXT: and r0, r0, r2 +; V7A-NEXT: and r1, r1, r3 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_a4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #32 +; V7A-T-NEXT: mov.w r12, #1 +; V7A-T-NEXT: subs.w lr, r2, #32 +; V7A-T-NEXT: lsl.w r2, r12, r2 +; V7A-T-NEXT: lsr.w r3, r12, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r3, r12, lr +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: subs r2, #1 +; V7A-T-NEXT: sbc r3, r3, #0 +; V7A-T-NEXT: ands r0, r2 +; V7A-T-NEXT: ands r1, r3 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_a4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r6, lr} +; V6M-NEXT: push {r4, r5, r6, lr} +; V6M-NEXT: mov r5, r1 +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: movs r6, #0 +; V6M-NEXT: mov r1, r6 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: subs r0, r0, #1 +; V6M-NEXT: sbcs r1, r6 +; V6M-NEXT: ands r1, r5 +; V6M-NEXT: ands r0, r4 +; V6M-NEXT: pop {r4, r5, r6, pc} + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +; ---------------------------------------------------------------------------- ; +; Pattern b. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_b0: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: bics r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_b0: +; V7A: @ %bb.0: +; V7A-NEXT: mvn r2, #0 +; V7A-NEXT: bic r0, r0, r2, lsl r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_b0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: bics r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_b0: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: mvns r2, r2 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: bics r0, r2 +; V6M-NEXT: bx lr + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bzhi32_b1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: bics r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_b1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: mvn r2, #0 +; V7A-NEXT: bic r0, r0, r2, lsl r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_b1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: bics r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_b1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: mvns r2, r2 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: bics r0, r2 +; V6M-NEXT: bx lr + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_b2_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: bics r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_b2_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mvn r2, #0 +; V7A-NEXT: bic r0, r0, r2, lsl r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_b2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: bics r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_b2_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: mvns r2, r2 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: bics r0, r2 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bzhi32_b3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: bics r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_b3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mvn r2, #0 +; V7A-NEXT: bic r0, r0, r2, lsl r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_b3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: bics r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_b3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: mvns r2, r2 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: bics r0, r2 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_b4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: lsl.w r1, r2, r1 +; V7M-NEXT: bics r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_b4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: mvn r2, #0 +; V7A-NEXT: bic r0, r0, r2, lsl r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_b4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: lsl.w r1, r2, r1 +; V7A-T-NEXT: bics r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_b4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: mvns r2, r2 +; V6M-NEXT: lsls r2, r1 +; V6M-NEXT: bics r0, r2 +; V6M-NEXT: bx lr + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_b0: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsl.w r12, r3, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl.w r12, #0 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl r3, r2 +; V7M-NEXT: bic.w r0, r0, r12 +; V7M-NEXT: bics r1, r3 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_b0: +; V7A: @ %bb.0: +; V7A-NEXT: subs r12, r2, #32 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsl r2, r3, r2 +; V7A-NEXT: lslpl r3, r3, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: bic r1, r1, r3 +; V7A-NEXT: bic r0, r0, r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_b0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsl.w r12, r3, r2 +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl.w r12, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl r3, r2 +; V7A-T-NEXT: bic.w r0, r0, r12 +; V7A-T-NEXT: bics r1, r3 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_b0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r5, r0 +; V6M-NEXT: bics r4, r1 +; V6M-NEXT: mov r0, r5 +; V6M-NEXT: mov r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bzhi64_b1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsl.w r12, r3, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl.w r12, #0 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl r3, r2 +; V7M-NEXT: bic.w r0, r0, r12 +; V7M-NEXT: bics r1, r3 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_b1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: subs r12, r2, #32 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsl r2, r3, r2 +; V7A-NEXT: lslpl r3, r3, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: bic r1, r1, r3 +; V7A-NEXT: bic r0, r0, r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_b1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsl.w r12, r3, r2 +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl.w r12, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl r3, r2 +; V7A-T-NEXT: bic.w r0, r0, r12 +; V7A-T-NEXT: bics r1, r3 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_b1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r5, r0 +; V6M-NEXT: bics r4, r1 +; V6M-NEXT: mov r0, r5 +; V6M-NEXT: mov r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_b2_load: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r1, #-1 +; V7M-NEXT: subs.w r12, r2, #32 +; V7M-NEXT: lsl.w r3, r1, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: ldrd r0, r2, [r0] +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, r1, r12 +; V7M-NEXT: bics r0, r3 +; V7M-NEXT: bic.w r1, r2, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_b2_load: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, lr} +; V7A-NEXT: push {r4, lr} +; V7A-NEXT: ldr r4, [r0] +; V7A-NEXT: mvn r1, #0 +; V7A-NEXT: ldr r3, [r0, #4] +; V7A-NEXT: subs r0, r2, #32 +; V7A-NEXT: lsl r2, r1, r2 +; V7A-NEXT: lslpl r1, r1, r0 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: bic r1, r3, r1 +; V7A-NEXT: bic r0, r4, r2 +; V7A-NEXT: pop {r4, pc} +; +; V7A-T-LABEL: bzhi64_b2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r1, #-1 +; V7A-T-NEXT: ldrd r0, r12, [r0] +; V7A-T-NEXT: lsl.w r3, r1, r2 +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r3, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl r1, r2 +; V7A-T-NEXT: bics r0, r3 +; V7A-T-NEXT: bic.w r1, r12, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_b2_load: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: ldm r4!, {r2, r3} +; V6M-NEXT: bics r2, r0 +; V6M-NEXT: bics r3, r1 +; V6M-NEXT: mov r0, r2 +; V6M-NEXT: mov r1, r3 +; V6M-NEXT: pop {r4, pc} + %val = load i64, ptr %w + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { +; V7M-LABEL: bzhi64_b3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r2, #-1 +; V7M-NEXT: subs.w r12, r1, #32 +; V7M-NEXT: lsl.w r3, r2, r1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r3, #0 +; V7M-NEXT: ldrd r0, r1, [r0] +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r2, r2, r12 +; V7M-NEXT: bics r1, r2 +; V7M-NEXT: bics r0, r3 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_b3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r6, r11, lr} +; V7A-NEXT: push {r4, r6, r11, lr} +; V7A-NEXT: mvn r2, #0 +; V7A-NEXT: ldr r6, [r0] +; V7A-NEXT: ldr r3, [r0, #4] +; V7A-NEXT: subs r0, r1, #32 +; V7A-NEXT: lsl r4, r2, r1 +; V7A-NEXT: lslpl r2, r2, r0 +; V7A-NEXT: movwpl r4, #0 +; V7A-NEXT: bic r1, r3, r2 +; V7A-NEXT: bic r0, r6, r4 +; V7A-NEXT: pop {r4, r6, r11, pc} +; +; V7A-T-LABEL: bzhi64_b3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: ldrd r0, r12, [r0] +; V7A-T-NEXT: lsl.w r3, r2, r1 +; V7A-T-NEXT: subs r1, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r3, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl r2, r1 +; V7A-T-NEXT: bics r0, r3 +; V7A-T-NEXT: bic.w r1, r12, r2 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_b3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: mov r2, r1 +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: ldm r4!, {r2, r3} +; V6M-NEXT: bics r2, r0 +; V6M-NEXT: bics r3, r1 +; V6M-NEXT: mov r0, r2 +; V6M-NEXT: mov r1, r3 +; V6M-NEXT: pop {r4, pc} + %val = load i64, ptr %w + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_b4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsl.w r12, r3, r2 +; V7M-NEXT: subs r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: movpl.w r12, #0 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl r3, r2 +; V7M-NEXT: bic.w r0, r0, r12 +; V7M-NEXT: bics r1, r3 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_b4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: subs r12, r2, #32 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsl r2, r3, r2 +; V7A-NEXT: lslpl r3, r3, r12 +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: bic r1, r1, r3 +; V7A-NEXT: bic r0, r0, r2 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_b4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsl.w r12, r3, r2 +; V7A-T-NEXT: subs r2, #32 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl.w r12, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl r3, r2 +; V7A-T-NEXT: bic.w r0, r0, r12 +; V7A-T-NEXT: bics r1, r3 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_b4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: bics r5, r0 +; V6M-NEXT: bics r4, r1 +; V6M-NEXT: mov r0, r5 +; V6M-NEXT: mov r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +; ---------------------------------------------------------------------------- ; +; Pattern c. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_c0: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_c0: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_c0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_c0: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_c1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_c1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_c1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_c1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_c2_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_c2_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_c2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_c2_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_c3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_c3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_c3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_c3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_c4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_c4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_c4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_c4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_c0: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsbs.w lr, r2, #32 +; V7M-NEXT: rsb.w r2, r2, #64 +; V7M-NEXT: mov.w r12, #-1 +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsr.w r2, r12, r2 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r3, r3, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: ands r0, r3 +; V7M-NEXT: ands r1, r2 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bzhi64_c0: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsbs lr, r2, #32 +; V7A-NEXT: rsb r2, r2, #64 +; V7A-NEXT: mvn r12, #0 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsr r2, r12, r2 +; V7A-NEXT: lsrpl r3, r3, lr +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: and r0, r3, r0 +; V7A-NEXT: and r1, r2, r1 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_c0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsbs.w lr, r2, #32 +; V7A-T-NEXT: rsb.w r2, r2, #64 +; V7A-T-NEXT: mov.w r12, #-1 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsr.w r2, r12, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r3, r3, lr +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: ands r0, r3 +; V7A-T-NEXT: ands r1, r2 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_c0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: movs r0, #64 +; V6M-NEXT: subs r2, r0, r2 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_c1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r2, r2, #64 +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: uxtb r2, r2 +; V7M-NEXT: subs.w r12, r2, #32 +; V7M-NEXT: lsr.w r2, r3, r2 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r3, r3, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: ands r0, r3 +; V7M-NEXT: ands r1, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_c1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsb lr, r2, #64 +; V7A-NEXT: mvn r2, #31 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: uxtb r12, lr +; V7A-NEXT: uxtab r2, r2, lr +; V7A-NEXT: lsr r12, r3, r12 +; V7A-NEXT: cmp r2, #0 +; V7A-NEXT: movwpl r12, #0 +; V7A-NEXT: lsrpl r3, r3, r2 +; V7A-NEXT: and r1, r12, r1 +; V7A-NEXT: and r0, r3, r0 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_c1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w lr, r2, #64 +; V7A-T-NEXT: mvn r2, #31 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: uxtb.w r12, lr +; V7A-T-NEXT: uxtab r2, r2, lr +; V7A-T-NEXT: lsr.w r12, r3, r12 +; V7A-T-NEXT: cmp r2, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl.w r12, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl r3, r2 +; V7A-T-NEXT: and.w r1, r1, r12 +; V7A-T-NEXT: ands r0, r3 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_c1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: movs r0, #64 +; V6M-NEXT: subs r0, r0, r2 +; V6M-NEXT: uxtb r2, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_c2_load: +; V7M: @ %bb.0: +; V7M-NEXT: rsbs.w r1, r2, #32 +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: rsb.w r2, r2, #64 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl r3, r1 +; V7M-NEXT: ldrd r0, r1, [r0] +; V7M-NEXT: mov.w r12, #-1 +; V7M-NEXT: lsr.w r2, r12, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: ands r0, r3 +; V7M-NEXT: ands r1, r2 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_c2_load: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r5, lr} +; V7A-NEXT: push {r5, lr} +; V7A-NEXT: rsbs r1, r2, #32 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: mvn r12, #0 +; V7A-NEXT: ldm r0, {r0, r5} +; V7A-NEXT: lsrpl r3, r3, r1 +; V7A-NEXT: rsb r1, r2, #64 +; V7A-NEXT: and r0, r3, r0 +; V7A-NEXT: lsr r1, r12, r1 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: and r1, r1, r5 +; V7A-NEXT: pop {r5, pc} +; +; V7A-T-LABEL: bzhi64_c2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsbs.w r1, r2, #32 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: ldrd r0, lr, [r0] +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl r3, r1 +; V7A-T-NEXT: rsb.w r1, r2, #64 +; V7A-T-NEXT: mov.w r12, #-1 +; V7A-T-NEXT: and.w r0, r0, r3 +; V7A-T-NEXT: lsr.w r1, r12, r1 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: and.w r1, r1, lr +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_c2_load: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #64 +; V6M-NEXT: subs r2, r0, r2 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldm r4!, {r2, r3} +; V6M-NEXT: ands r0, r2 +; V6M-NEXT: ands r1, r3 +; V6M-NEXT: pop {r4, pc} + %val = load i64, ptr %w + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_c3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #64 +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: subs.w r2, r1, #32 +; V7M-NEXT: lsr.w r1, r3, r1 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl r3, r2 +; V7M-NEXT: ldrd r0, r2, [r0] +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: ands r1, r2 +; V7M-NEXT: ands r0, r3 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_c3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r4, r6, r11, lr} +; V7A-NEXT: push {r4, r6, r11, lr} +; V7A-NEXT: rsb r1, r1, #64 +; V7A-NEXT: mvn r4, #31 +; V7A-NEXT: mvn r2, #0 +; V7A-NEXT: ldr r6, [r0] +; V7A-NEXT: ldr r3, [r0, #4] +; V7A-NEXT: uxtb r0, r1 +; V7A-NEXT: uxtab r4, r4, r1 +; V7A-NEXT: lsr r0, r2, r0 +; V7A-NEXT: cmp r4, #0 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: and r1, r0, r3 +; V7A-NEXT: lsrpl r2, r2, r4 +; V7A-NEXT: and r0, r2, r6 +; V7A-NEXT: pop {r4, r6, r11, pc} +; +; V7A-T-LABEL: bzhi64_c3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r1, r1, #64 +; V7A-T-NEXT: mvn r3, #31 +; V7A-T-NEXT: ldrd r12, lr, [r0] +; V7A-T-NEXT: mov.w r2, #-1 +; V7A-T-NEXT: uxtb r0, r1 +; V7A-T-NEXT: uxtab r3, r3, r1 +; V7A-T-NEXT: lsr.w r0, r2, r0 +; V7A-T-NEXT: cmp r3, #0 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: and.w r1, r0, lr +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl r2, r3 +; V7A-T-NEXT: and.w r0, r2, r12 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_c3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: mov r4, r0 +; V6M-NEXT: movs r0, #64 +; V6M-NEXT: subs r0, r0, r1 +; V6M-NEXT: uxtb r2, r0 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ldm r4!, {r2, r3} +; V6M-NEXT: ands r0, r2 +; V6M-NEXT: ands r1, r3 +; V6M-NEXT: pop {r4, pc} + %val = load i64, ptr %w + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_c4_commutative: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsbs.w lr, r2, #32 +; V7M-NEXT: rsb.w r2, r2, #64 +; V7M-NEXT: mov.w r12, #-1 +; V7M-NEXT: mov.w r3, #-1 +; V7M-NEXT: lsr.w r2, r12, r2 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r3, r3, lr +; V7M-NEXT: it pl +; V7M-NEXT: movpl r2, #0 +; V7M-NEXT: ands r0, r3 +; V7M-NEXT: ands r1, r2 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bzhi64_c4_commutative: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsbs lr, r2, #32 +; V7A-NEXT: rsb r2, r2, #64 +; V7A-NEXT: mvn r12, #0 +; V7A-NEXT: mvn r3, #0 +; V7A-NEXT: lsr r2, r12, r2 +; V7A-NEXT: lsrpl r3, r3, lr +; V7A-NEXT: movwpl r2, #0 +; V7A-NEXT: and r0, r0, r3 +; V7A-NEXT: and r1, r1, r2 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_c4_commutative: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsbs.w lr, r2, #32 +; V7A-T-NEXT: rsb.w r2, r2, #64 +; V7A-T-NEXT: mov.w r12, #-1 +; V7A-T-NEXT: mov.w r3, #-1 +; V7A-T-NEXT: lsr.w r2, r12, r2 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r3, r3, lr +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r2, #0 +; V7A-T-NEXT: ands r0, r3 +; V7A-T-NEXT: ands r1, r2 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_c4_commutative: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, r5, r7, lr} +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: mov r4, r1 +; V6M-NEXT: mov r5, r0 +; V6M-NEXT: movs r0, #64 +; V6M-NEXT: subs r2, r0, r2 +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: mvns r0, r0 +; V6M-NEXT: mov r1, r0 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: ands r0, r5 +; V6M-NEXT: ands r1, r4 +; V6M-NEXT: pop {r4, r5, r7, pc} + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +; ---------------------------------------------------------------------------- ; +; Pattern d. 32-bit. +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_d0: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_d0: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_d0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_d0: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_d1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_d1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_d1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_d1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %highbitscleared = shl i32 %val, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_d2_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_d2_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_d2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_d2_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { +; V7M-LABEL: bzhi32_d3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #32 +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: lsls r0, r1 +; V7M-NEXT: lsrs r0, r1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_d3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: rsb r1, r1, #32 +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: uxtb r1, r1 +; V7A-NEXT: lsl r0, r0, r1 +; V7A-NEXT: lsr r0, r0, r1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_d3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: rsb.w r1, r1, #32 +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: uxtb r1, r1 +; V7A-T-NEXT: lsls r0, r1 +; V7A-T-NEXT: lsrs r0, r1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_d3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #32 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: uxtb r1, r1 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: lsls r0, r1 +; V6M-NEXT: lsrs r0, r1 +; V6M-NEXT: bx lr + %val = load i32, ptr %w + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %highbitscleared = shl i32 %val, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +; 64-bit. + +define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_d0: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsb.w r3, r2, #64 +; V7M-NEXT: rsbs.w r2, r2, #32 +; V7M-NEXT: rsb.w lr, r3, #32 +; V7M-NEXT: lsl.w r12, r1, r3 +; V7M-NEXT: lsr.w r1, r0, lr +; V7M-NEXT: orr.w r1, r1, r12 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, r0, r2 +; V7M-NEXT: lsl.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsl.w r12, r1, lr +; V7M-NEXT: lsr.w r0, r0, r3 +; V7M-NEXT: orr.w r0, r0, r12 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r2 +; V7M-NEXT: lsr.w r1, r1, r3 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bzhi64_d0: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsb lr, r2, #64 +; V7A-NEXT: rsbs r2, r2, #32 +; V7A-NEXT: rsb r12, lr, #32 +; V7A-NEXT: lsr r3, r0, r12 +; V7A-NEXT: orr r1, r3, r1, lsl lr +; V7A-NEXT: lslpl r1, r0, r2 +; V7A-NEXT: lsl r0, r0, lr +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lsr r0, r0, lr +; V7A-NEXT: orr r0, r0, r1, lsl r12 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: lsr r1, r1, lr +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_d0: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #64 +; V7A-T-NEXT: rsbs.w r2, r2, #32 +; V7A-T-NEXT: rsb.w lr, r3, #32 +; V7A-T-NEXT: lsl.w r12, r1, r3 +; V7A-T-NEXT: lsr.w r1, r0, lr +; V7A-T-NEXT: orr.w r1, r1, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r1, r0, r2 +; V7A-T-NEXT: lsl.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsl.w r12, r1, lr +; V7A-T-NEXT: lsr.w r0, r0, r3 +; V7A-T-NEXT: orr.w r0, r0, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: lsr.w r1, r1, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_d0: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: movs r3, #64 +; V6M-NEXT: subs r4, r3, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, pc} + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_d1_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r2, r2, #64 +; V7M-NEXT: uxtb r2, r2 +; V7M-NEXT: rsb.w r3, r2, #32 +; V7M-NEXT: lsl.w r12, r1, r2 +; V7M-NEXT: lsr.w r1, r0, r3 +; V7M-NEXT: orr.w r1, r1, r12 +; V7M-NEXT: subs.w r12, r2, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r1, r0, r12 +; V7M-NEXT: lsl.w r0, r0, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsl.w r3, r1, r3 +; V7M-NEXT: lsr.w r0, r0, r2 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r1, r12 +; V7M-NEXT: lsr.w r1, r1, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_d1_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r11, lr} +; V7A-NEXT: push {r11, lr} +; V7A-NEXT: rsb lr, r2, #64 +; V7A-NEXT: uxtb r3, lr +; V7A-NEXT: rsb r12, r3, #32 +; V7A-NEXT: lsr r2, r0, r12 +; V7A-NEXT: orr r1, r2, r1, lsl r3 +; V7A-NEXT: mvn r2, #31 +; V7A-NEXT: uxtab r2, r2, lr +; V7A-NEXT: cmp r2, #0 +; V7A-NEXT: lslpl r1, r0, r2 +; V7A-NEXT: lsl r0, r0, r3 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lsr r0, r0, r3 +; V7A-NEXT: orr r0, r0, r1, lsl r12 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: lsr r1, r1, r3 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: pop {r11, pc} +; +; V7A-T-LABEL: bzhi64_d1_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: rsb.w r4, r2, #64 +; V7A-T-NEXT: mvn r2, #31 +; V7A-T-NEXT: uxtb r3, r4 +; V7A-T-NEXT: rsb.w lr, r3, #32 +; V7A-T-NEXT: lsl.w r12, r1, r3 +; V7A-T-NEXT: uxtab r2, r2, r4 +; V7A-T-NEXT: lsr.w r1, r0, lr +; V7A-T-NEXT: cmp r2, #0 +; V7A-T-NEXT: orr.w r1, r1, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r1, r0, r2 +; V7A-T-NEXT: lsl.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsl.w r4, r1, lr +; V7A-T-NEXT: lsr.w r0, r0, r3 +; V7A-T-NEXT: orr.w r0, r0, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: lsr.w r1, r1, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bzhi64_d1_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: movs r3, #64 +; V6M-NEXT: subs r2, r3, r2 +; V6M-NEXT: uxtb r4, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, pc} + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %highbitscleared = shl i64 %val, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_d2_load: +; V7M: @ %bb.0: +; V7M-NEXT: .save {r7, lr} +; V7M-NEXT: push {r7, lr} +; V7M-NEXT: rsb.w r1, r2, #64 +; V7M-NEXT: ldrd r0, r3, [r0] +; V7M-NEXT: rsb.w lr, r1, #32 +; V7M-NEXT: rsbs.w r2, r2, #32 +; V7M-NEXT: lsl.w r12, r3, r1 +; V7M-NEXT: lsr.w r3, r0, lr +; V7M-NEXT: orr.w r3, r3, r12 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r3, r0, r2 +; V7M-NEXT: lsl.w r0, r0, r1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsl.w r12, r3, lr +; V7M-NEXT: lsr.w r0, r0, r1 +; V7M-NEXT: lsr.w r1, r3, r1 +; V7M-NEXT: orr.w r0, r0, r12 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r3, r2 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: pop {r7, pc} +; +; V7A-LABEL: bzhi64_d2_load: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r5, lr} +; V7A-NEXT: push {r5, lr} +; V7A-NEXT: rsb r3, r2, #64 +; V7A-NEXT: ldm r0, {r0, r5} +; V7A-NEXT: rsb r12, r3, #32 +; V7A-NEXT: rsbs r2, r2, #32 +; V7A-NEXT: lsr r1, r0, r12 +; V7A-NEXT: orr r1, r1, r5, lsl r3 +; V7A-NEXT: lslpl r1, r0, r2 +; V7A-NEXT: lsl r0, r0, r3 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lsr r0, r0, r3 +; V7A-NEXT: orr r0, r0, r1, lsl r12 +; V7A-NEXT: lsrpl r0, r1, r2 +; V7A-NEXT: lsr r1, r1, r3 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: pop {r5, pc} +; +; V7A-T-LABEL: bzhi64_d2_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r7, lr} +; V7A-T-NEXT: push {r7, lr} +; V7A-T-NEXT: rsb.w r3, r2, #64 +; V7A-T-NEXT: ldrd r0, r1, [r0] +; V7A-T-NEXT: rsb.w lr, r3, #32 +; V7A-T-NEXT: rsbs.w r2, r2, #32 +; V7A-T-NEXT: lsl.w r12, r1, r3 +; V7A-T-NEXT: lsr.w r1, r0, lr +; V7A-T-NEXT: orr.w r1, r1, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r1, r0, r2 +; V7A-T-NEXT: lsl.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsl.w r12, r1, lr +; V7A-T-NEXT: lsr.w r0, r0, r3 +; V7A-T-NEXT: orr.w r0, r0, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r1, r2 +; V7A-T-NEXT: lsr.w r1, r1, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: pop {r7, pc} +; +; V6M-LABEL: bzhi64_d2_load: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: movs r1, #64 +; V6M-NEXT: subs r4, r1, r2 +; V6M-NEXT: ldr r2, [r0] +; V6M-NEXT: ldr r1, [r0, #4] +; V6M-NEXT: mov r0, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, pc} + %val = load i64, ptr %w + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { +; V7M-LABEL: bzhi64_d3_load_indexzext: +; V7M: @ %bb.0: +; V7M-NEXT: rsb.w r1, r1, #64 +; V7M-NEXT: ldrd r0, r2, [r0] +; V7M-NEXT: uxtb r1, r1 +; V7M-NEXT: rsb.w r3, r1, #32 +; V7M-NEXT: lsl.w r12, r2, r1 +; V7M-NEXT: lsr.w r2, r0, r3 +; V7M-NEXT: orr.w r2, r2, r12 +; V7M-NEXT: subs.w r12, r1, #32 +; V7M-NEXT: it pl +; V7M-NEXT: lslpl.w r2, r0, r12 +; V7M-NEXT: lsl.w r0, r0, r1 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r0, #0 +; V7M-NEXT: lsl.w r3, r2, r3 +; V7M-NEXT: lsr.w r0, r0, r1 +; V7M-NEXT: lsr.w r1, r2, r1 +; V7M-NEXT: orr.w r0, r0, r3 +; V7M-NEXT: it pl +; V7M-NEXT: lsrpl.w r0, r2, r12 +; V7M-NEXT: it pl +; V7M-NEXT: movpl r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_d3_load_indexzext: +; V7A: @ %bb.0: +; V7A-NEXT: .save {r5, lr} +; V7A-NEXT: push {r5, lr} +; V7A-NEXT: rsb r1, r1, #64 +; V7A-NEXT: ldm r0, {r0, r5} +; V7A-NEXT: uxtb r2, r1 +; V7A-NEXT: rsb r12, r2, #32 +; V7A-NEXT: lsr r3, r0, r12 +; V7A-NEXT: orr r3, r3, r5, lsl r2 +; V7A-NEXT: mvn r5, #31 +; V7A-NEXT: uxtab r1, r5, r1 +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: lslpl r3, r0, r1 +; V7A-NEXT: lsl r0, r0, r2 +; V7A-NEXT: movwpl r0, #0 +; V7A-NEXT: lsr r0, r0, r2 +; V7A-NEXT: orr r0, r0, r3, lsl r12 +; V7A-NEXT: lsrpl r0, r3, r1 +; V7A-NEXT: lsr r1, r3, r2 +; V7A-NEXT: movwpl r1, #0 +; V7A-NEXT: pop {r5, pc} +; +; V7A-T-LABEL: bzhi64_d3_load_indexzext: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: .save {r4, lr} +; V7A-T-NEXT: push {r4, lr} +; V7A-T-NEXT: rsb.w r4, r1, #64 +; V7A-T-NEXT: ldrd r0, r2, [r0] +; V7A-T-NEXT: mvn r1, #31 +; V7A-T-NEXT: uxtb r3, r4 +; V7A-T-NEXT: rsb.w lr, r3, #32 +; V7A-T-NEXT: lsl.w r12, r2, r3 +; V7A-T-NEXT: uxtab r1, r1, r4 +; V7A-T-NEXT: lsr.w r2, r0, lr +; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: orr.w r2, r2, r12 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lslpl.w r2, r0, r1 +; V7A-T-NEXT: lsl.w r0, r0, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r0, #0 +; V7A-T-NEXT: lsl.w r4, r2, lr +; V7A-T-NEXT: lsr.w r0, r0, r3 +; V7A-T-NEXT: orr.w r0, r0, r4 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: lsrpl.w r0, r2, r1 +; V7A-T-NEXT: lsr.w r1, r2, r3 +; V7A-T-NEXT: it pl +; V7A-T-NEXT: movpl r1, #0 +; V7A-T-NEXT: pop {r4, pc} +; +; V6M-LABEL: bzhi64_d3_load_indexzext: +; V6M: @ %bb.0: +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: movs r2, #64 +; V6M-NEXT: subs r1, r2, r1 +; V6M-NEXT: uxtb r4, r1 +; V6M-NEXT: ldr r2, [r0] +; V6M-NEXT: ldr r1, [r0, #4] +; V6M-NEXT: mov r0, r2 +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsl +; V6M-NEXT: mov r2, r4 +; V6M-NEXT: bl __aeabi_llsr +; V6M-NEXT: pop {r4, pc} + %val = load i64, ptr %w + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %highbitscleared = shl i64 %val, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +; ---------------------------------------------------------------------------- ; +; Constant mask +; ---------------------------------------------------------------------------- ; + +; 32-bit + +define i32 @bzhi32_constant_mask32(i32 %val) nounwind { +; V7M-LABEL: bzhi32_constant_mask32: +; V7M: @ %bb.0: +; V7M-NEXT: bic r0, r0, #-2147483648 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_constant_mask32: +; V7A: @ %bb.0: +; V7A-NEXT: bic r0, r0, #-2147483648 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_constant_mask32: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: bic r0, r0, #-2147483648 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_constant_mask32: +; V6M: @ %bb.0: +; V6M-NEXT: movs r1, #1 +; V6M-NEXT: lsls r1, r1, #31 +; V6M-NEXT: bics r0, r1 +; V6M-NEXT: bx lr + %masked = and i32 %val, 2147483647 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask32_load(ptr %val) nounwind { +; V7M-LABEL: bzhi32_constant_mask32_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: bic r0, r0, #-2147483648 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_constant_mask32_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: bic r0, r0, #-2147483648 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_constant_mask32_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: bic r0, r0, #-2147483648 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_constant_mask32_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r1, #1 +; V6M-NEXT: lsls r1, r1, #31 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: bics r0, r1 +; V6M-NEXT: bx lr + %val1 = load i32, ptr %val + %masked = and i32 %val1, 2147483647 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask16(i32 %val) nounwind { +; V7M-LABEL: bzhi32_constant_mask16: +; V7M: @ %bb.0: +; V7M-NEXT: bfc r0, #15, #17 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_constant_mask16: +; V7A: @ %bb.0: +; V7A-NEXT: bfc r0, #15, #17 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_constant_mask16: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: bfc r0, #15, #17 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_constant_mask16: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r1, .LCPI41_0 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI41_0: +; V6M-NEXT: .long 32767 @ 0x7fff + %masked = and i32 %val, 32767 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask16_load(ptr %val) nounwind { +; V7M-LABEL: bzhi32_constant_mask16_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: bfc r0, #15, #17 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_constant_mask16_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: bfc r0, #15, #17 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_constant_mask16_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: bfc r0, #15, #17 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_constant_mask16_load: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r1, [r0] +; V6M-NEXT: ldr r0, .LCPI42_0 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI42_0: +; V6M-NEXT: .long 32767 @ 0x7fff + %val1 = load i32, ptr %val + %masked = and i32 %val1, 32767 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask8(i32 %val) nounwind { +; V7M-LABEL: bzhi32_constant_mask8: +; V7M: @ %bb.0: +; V7M-NEXT: and r0, r0, #127 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_constant_mask8: +; V7A: @ %bb.0: +; V7A-NEXT: and r0, r0, #127 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_constant_mask8: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: and r0, r0, #127 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_constant_mask8: +; V6M: @ %bb.0: +; V6M-NEXT: movs r1, #127 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %masked = and i32 %val, 127 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask8_load(ptr %val) nounwind { +; V7M-LABEL: bzhi32_constant_mask8_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: and r0, r0, #127 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi32_constant_mask8_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: and r0, r0, #127 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi32_constant_mask8_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: and r0, r0, #127 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi32_constant_mask8_load: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r1, [r0] +; V6M-NEXT: movs r0, #127 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: bx lr + %val1 = load i32, ptr %val + %masked = and i32 %val1, 127 + ret i32 %masked +} + +; 64-bit + +define i64 @bzhi64_constant_mask64(i64 %val) nounwind { +; V7M-LABEL: bzhi64_constant_mask64: +; V7M: @ %bb.0: +; V7M-NEXT: bic r1, r1, #-1073741824 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_constant_mask64: +; V7A: @ %bb.0: +; V7A-NEXT: bic r1, r1, #-1073741824 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_constant_mask64: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: bic r1, r1, #-1073741824 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_constant_mask64: +; V6M: @ %bb.0: +; V6M-NEXT: movs r2, #3 +; V6M-NEXT: lsls r2, r2, #30 +; V6M-NEXT: bics r1, r2 +; V6M-NEXT: bx lr + %masked = and i64 %val, 4611686018427387903 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask64_load(ptr %val) nounwind { +; V7M-LABEL: bzhi64_constant_mask64_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldrd r0, r1, [r0] +; V7M-NEXT: bic r1, r1, #-1073741824 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_constant_mask64_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldrd r0, r1, [r0] +; V7A-NEXT: bic r1, r1, #-1073741824 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_constant_mask64_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldrd r0, r1, [r0] +; V7A-T-NEXT: bic r1, r1, #-1073741824 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_constant_mask64_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r1, #3 +; V6M-NEXT: lsls r3, r1, #30 +; V6M-NEXT: ldr r2, [r0] +; V6M-NEXT: ldr r1, [r0, #4] +; V6M-NEXT: bics r1, r3 +; V6M-NEXT: mov r0, r2 +; V6M-NEXT: bx lr + %val1 = load i64, ptr %val + %masked = and i64 %val1, 4611686018427387903 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask32(i64 %val) nounwind { +; V7M-LABEL: bzhi64_constant_mask32: +; V7M: @ %bb.0: +; V7M-NEXT: bic r0, r0, #-2147483648 +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_constant_mask32: +; V7A: @ %bb.0: +; V7A-NEXT: bic r0, r0, #-2147483648 +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_constant_mask32: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: bic r0, r0, #-2147483648 +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_constant_mask32: +; V6M: @ %bb.0: +; V6M-NEXT: movs r1, #1 +; V6M-NEXT: lsls r1, r1, #31 +; V6M-NEXT: bics r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr + %masked = and i64 %val, 2147483647 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask32_load(ptr %val) nounwind { +; V7M-LABEL: bzhi64_constant_mask32_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bic r0, r0, #-2147483648 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_constant_mask32_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bic r0, r0, #-2147483648 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_constant_mask32_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bic r0, r0, #-2147483648 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_constant_mask32_load: +; V6M: @ %bb.0: +; V6M-NEXT: movs r1, #1 +; V6M-NEXT: lsls r1, r1, #31 +; V6M-NEXT: ldr r0, [r0] +; V6M-NEXT: bics r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr + %val1 = load i64, ptr %val + %masked = and i64 %val1, 2147483647 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask16(i64 %val) nounwind { +; V7M-LABEL: bzhi64_constant_mask16: +; V7M: @ %bb.0: +; V7M-NEXT: bfc r0, #15, #17 +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_constant_mask16: +; V7A: @ %bb.0: +; V7A-NEXT: bfc r0, #15, #17 +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_constant_mask16: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: bfc r0, #15, #17 +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_constant_mask16: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r1, .LCPI49_0 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI49_0: +; V6M-NEXT: .long 32767 @ 0x7fff + %masked = and i64 %val, 32767 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask16_load(ptr %val) nounwind { +; V7M-LABEL: bzhi64_constant_mask16_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bfc r0, #15, #17 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_constant_mask16_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bfc r0, #15, #17 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_constant_mask16_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bfc r0, #15, #17 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_constant_mask16_load: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r1, [r0] +; V6M-NEXT: ldr r0, .LCPI50_0 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.1: +; V6M-NEXT: .LCPI50_0: +; V6M-NEXT: .long 32767 @ 0x7fff + %val1 = load i64, ptr %val + %masked = and i64 %val1, 32767 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask8(i64 %val) nounwind { +; V7M-LABEL: bzhi64_constant_mask8: +; V7M: @ %bb.0: +; V7M-NEXT: and r0, r0, #127 +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_constant_mask8: +; V7A: @ %bb.0: +; V7A-NEXT: and r0, r0, #127 +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_constant_mask8: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: and r0, r0, #127 +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_constant_mask8: +; V6M: @ %bb.0: +; V6M-NEXT: movs r1, #127 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr + %masked = and i64 %val, 127 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask8_load(ptr %val) nounwind { +; V7M-LABEL: bzhi64_constant_mask8_load: +; V7M: @ %bb.0: +; V7M-NEXT: ldr r0, [r0] +; V7M-NEXT: movs r1, #0 +; V7M-NEXT: and r0, r0, #127 +; V7M-NEXT: bx lr +; +; V7A-LABEL: bzhi64_constant_mask8_load: +; V7A: @ %bb.0: +; V7A-NEXT: ldr r0, [r0] +; V7A-NEXT: mov r1, #0 +; V7A-NEXT: and r0, r0, #127 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: bzhi64_constant_mask8_load: +; V7A-T: @ %bb.0: +; V7A-T-NEXT: ldr r0, [r0] +; V7A-T-NEXT: movs r1, #0 +; V7A-T-NEXT: and r0, r0, #127 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: bzhi64_constant_mask8_load: +; V6M: @ %bb.0: +; V6M-NEXT: ldr r1, [r0] +; V6M-NEXT: movs r0, #127 +; V6M-NEXT: ands r0, r1 +; V6M-NEXT: movs r1, #0 +; V6M-NEXT: bx lr + %val1 = load i64, ptr %val + %masked = and i64 %val1, 127 + ret i64 %masked +} diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir index 8e671c903add..f5b2e98b62fd 100644 --- a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir +++ b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir @@ -81,7 +81,7 @@ body: | STRi12 killed renamable $r1, killed renamable $r0, 0, 14, $noreg :: (volatile store (s32) into %ir.LL, align 8) dead renamable $r0 = SPACE 8920, undef renamable $r0 renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S) - renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg + renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S) renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg dead renamable $r1 = SPACE 1350, undef renamable $r0 diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir index 03ddd80ed0ea..4b6647683139 100644 --- a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir +++ b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir @@ -72,7 +72,7 @@ body: | renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S) renamable $s0 = VLDRH %const.1, 0, 14, $noreg :: (load (s16) from constant-pool) dead renamable $r0 = SPACE 1230, undef renamable $r0 - renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg + renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S) renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg dead renamable $r1 = SPACE 1330, undef renamable $r0 diff --git a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir index 46f028bd492d..c16a62a8a989 100644 --- a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir +++ b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir @@ -89,7 +89,7 @@ body: | $sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 4 renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool) - VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv + VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res) FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv Bcc %bb.2, 0, killed $cpsr diff --git a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir index 5a03fcdb7fdf..049b7d9b4613 100644 --- a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir +++ b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir @@ -95,7 +95,7 @@ body: | $sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 4 renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool) - VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv + VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res) FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv Bcc %bb.2, 0, killed $cpsr diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index 800ee87b95ca..8230e47259dd 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -1572,26 +1572,11 @@ define void @test_fma(ptr %p, ptr %q, ptr %r) #0 { } define void @test_fabs(ptr %p) { -; CHECK-FP16-LABEL: test_fabs: -; CHECK-FP16: ldrh r1, [r0] -; CHECK-FP16-NEXT: vmov s0, r1 -; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-FP16-NEXT: vabs.f32 s0, s0 -; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-FP16-NEXT: vmov r1, s0 -; CHECK-FP16-NEXT: strh r1, [r0] -; CHECK-FP16-NEXT: bx lr -; -; CHECK-LIBCALL-LABEL: test_fabs: -; CHECK-LIBCALL: .save {r4, lr} -; CHECK-LIBCALL-NEXT: push {r4, lr} -; CHECK-LIBCALL-NEXT: mov r4, r0 -; CHECK-LIBCALL-NEXT: ldrh r0, [r0] -; CHECK-LIBCALL-NEXT: bl __aeabi_h2f -; CHECK-LIBCALL-NEXT: bic r0, r0, #-2147483648 -; CHECK-LIBCALL-NEXT: bl __aeabi_f2h -; CHECK-LIBCALL-NEXT: strh r0, [r4] -; CHECK-LIBCALL-NEXT: pop {r4, pc} +; CHECK-ALL-LABEL: test_fabs: +; CHECK-ALL: ldrh r1, [r0] +; CHECK-ALL-NEXT: bfc r1, #15, #17 +; CHECK-ALL-NEXT: strh r1, [r0] +; CHECK-ALL-NEXT: bx lr %a = load half, ptr %p, align 2 %r = call half @llvm.fabs.f16(half %a) store half %r, ptr %p @@ -2454,26 +2439,11 @@ define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { } define void @test_fneg(ptr %p1, ptr %p2) #0 { -; CHECK-FP16-LABEL: test_fneg: -; CHECK-FP16: ldrh r0, [r0] -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-FP16-NEXT: vneg.f32 s0, s0 -; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-FP16-NEXT: vmov r0, s0 -; CHECK-FP16-NEXT: strh r0, [r1] -; CHECK-FP16-NEXT: bx lr -; -; CHECK-LIBCALL-LABEL: test_fneg: -; CHECK-LIBCALL: .save {r4, lr} -; CHECK-LIBCALL-NEXT: push {r4, lr} -; CHECK-LIBCALL-NEXT: ldrh r0, [r0] -; CHECK-LIBCALL-NEXT: mov r4, r1 -; CHECK-LIBCALL-NEXT: bl __aeabi_h2f -; CHECK-LIBCALL-NEXT: eor r0, r0, #-2147483648 -; CHECK-LIBCALL-NEXT: bl __aeabi_f2h -; CHECK-LIBCALL-NEXT: strh r0, [r4] -; CHECK-LIBCALL-NEXT: pop {r4, pc} +; CHECK-ALL-LABEL: test_fneg: +; CHECK-ALL: ldrh r0, [r0] +; CHECK-ALL-NEXT: eor r0, r0, #32768 +; CHECK-ALL-NEXT: strh r0, [r1] +; CHECK-ALL-NEXT: bx lr %v = load half, ptr %p1, align 2 %res = fneg half %v store half %res, ptr %p2, align 2 diff --git a/llvm/test/CodeGen/ARM/fp16_fast_math.ll b/llvm/test/CodeGen/ARM/fp16_fast_math.ll index 165eb4b8af43..47e1f84ff664 100644 --- a/llvm/test/CodeGen/ARM/fp16_fast_math.ll +++ b/llvm/test/CodeGen/ARM/fp16_fast_math.ll @@ -16,11 +16,11 @@ define half @normal_fadd(half %x, half %y) { ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0 ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]] ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]] - ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF - ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]] ; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]] ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 @@ -33,7 +33,7 @@ define half @normal_fadd(half %x, half %y) { ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]] ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 entry: @@ -50,11 +50,11 @@ define half @fast_fadd(half %x, half %y) { ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0 ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]] ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]] - ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF - ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]] ; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]] ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 @@ -67,7 +67,7 @@ define half @fast_fadd(half %x, half %y) { ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]] ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 entry: @@ -84,11 +84,11 @@ define half @ninf_fadd(half %x, half %y) { ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0 ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]] ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]] - ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF - ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]] ; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]] ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 @@ -101,7 +101,7 @@ define half @ninf_fadd(half %x, half %y) { ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]] ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 entry: @@ -122,19 +122,19 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) { ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0 ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]] ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]] - ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]] - ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF - ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]] ; CHECK-CVT-NEXT: [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]] - ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF - ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]] ; CHECK-CVT-NEXT: $r0 = COPY [[COPY8]] ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 @@ -148,9 +148,9 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) { ; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0 ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]] ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 entry: @@ -169,14 +169,14 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) { ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0 ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]] ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]] - ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]] - ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF - ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]] ; CHECK-CVT-NEXT: $r0 = COPY [[COPY6]] ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 @@ -190,9 +190,9 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) { ; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0 ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]] ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 entry: @@ -211,19 +211,19 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) { ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0 ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]] ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]] - ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]] - ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF - ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]] ; CHECK-CVT-NEXT: [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]] - ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg - ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr + ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF - ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr ; CHECK-CVT-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]] ; CHECK-CVT-NEXT: $r0 = COPY [[COPY8]] ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 @@ -237,9 +237,9 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) { ; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0 ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg - ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = ninf VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]] ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 entry: diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 8ab56b228d2a..a6f0a03fc7e5 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -383,8 +383,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; SOFT-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; SOFT-LABEL: utest_f16i32: ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r7, lr} ; SOFT-NEXT: push {r7, lr} @@ -400,7 +400,7 @@ define i32 @utesth_f16i32(half %x) { ; SOFT-NEXT: .LBB7_2: @ %entry ; SOFT-NEXT: pop {r7, pc} ; -; VFP2-LABEL: utesth_f16i32: +; VFP2-LABEL: utest_f16i32: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} @@ -411,7 +411,7 @@ define i32 @utesth_f16i32(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; -; FULL-LABEL: utesth_f16i32: +; FULL-LABEL: utest_f16i32: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.u32.f16 s0, s0 ; FULL-NEXT: vmov r0, s0 @@ -3985,6 +3985,46 @@ entry: ret i32 %spec.store.select7 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; SOFT-LABEL: ustest_f16i32_nsat: +; SOFT: @ %bb.0: +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} +; SOFT-NEXT: uxth r0, r0 +; SOFT-NEXT: bl __aeabi_h2f +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: bics r0, r1 +; SOFT-NEXT: pop {r7, pc} +; +; VFP2-LABEL: ustest_f16i32_nsat: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: bl __aeabi_h2f +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: usat r0, #0, r0 +; VFP2-NEXT: pop {r7, pc} +; +; FULL-LABEL: ustest_f16i32_nsat: +; FULL: @ %bb.0: +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: usat r0, #0, r0 +; FULL-NEXT: bx lr + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 96f009a4da02..ba31b353ee1f 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -748,8 +748,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i32: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} @@ -821,7 +821,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vpop {d12, d13} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i32: +; CHECK-FP16-LABEL: utest_f16i32: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} @@ -1366,8 +1366,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i16: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -1441,7 +1441,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i16: +; CHECK-FP16-LABEL: utest_f16i16: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vmovx.f16 s4, s0 ; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0 @@ -2109,8 +2109,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, lr} @@ -2148,7 +2148,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; -; CHECK-FP16-LABEL: utesth_f16i64: +; CHECK-FP16-LABEL: utest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -2835,8 +2835,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -2881,7 +2881,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i32_mm: +; CHECK-FP16-LABEL: utest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -3344,8 +3344,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i16_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -3419,7 +3419,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i16_mm: +; CHECK-FP16-LABEL: utest_f16i16_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vmovx.f16 s4, s0 ; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0 @@ -4044,8 +4044,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, lr} @@ -4083,7 +4083,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; -; CHECK-FP16-LABEL: utesth_f16i64_mm: +; CHECK-FP16-LABEL: utest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -4215,6 +4215,77 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-NEON-LABEL: ustest_f16i32_nsat: +; CHECK-NEON: @ %bb.0: @ %entry +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} +; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.f32 s16, s3 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 +; CHECK-NEON-NEXT: vmov.f32 s20, s1 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: mov r4, r0 +; CHECK-NEON-NEXT: vmov r0, s16 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov s16, r0 +; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov s0, r0 +; CHECK-NEON-NEXT: vmov r1, s20 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vmov s18, r4 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.32 d11[0], r0 +; CHECK-NEON-NEXT: mov r0, r1 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s18 +; CHECK-NEON-NEXT: vmov s0, r0 +; CHECK-NEON-NEXT: vcvt.s32.f32 s4, s16 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEON-NEXT: vmov r0, s2 +; CHECK-NEON-NEXT: vmov.32 d10[0], r0 +; CHECK-NEON-NEXT: vmov r0, s4 +; CHECK-NEON-NEXT: vmov.32 d11[1], r0 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.32 d10[1], r0 +; CHECK-NEON-NEXT: vmin.s32 q9, q5, q8 +; CHECK-NEON-NEXT: vmax.s32 q0, q9, q8 +; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEON-NEXT: pop {r4, pc} +; +; CHECK-FP16-LABEL: ustest_f16i32_nsat: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: vmovx.f16 s2, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s1 +; CHECK-FP16-NEXT: vmovx.f16 s4, s1 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4 +; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-FP16-NEXT: vmov.i32 q9, #0x0 +; CHECK-FP16-NEXT: vmov.32 d17[0], r0 +; CHECK-FP16-NEXT: vmov r0, s6 +; CHECK-FP16-NEXT: vmov.32 d16[0], r0 +; CHECK-FP16-NEXT: vmov r0, s4 +; CHECK-FP16-NEXT: vmov.32 d17[1], r0 +; CHECK-FP16-NEXT: vmov r0, s2 +; CHECK-FP16-NEXT: vmov.32 d16[1], r0 +; CHECK-FP16-NEXT: vmin.s32 q8, q8, q9 +; CHECK-FP16-NEXT: vmax.s32 q0, q8, q9 +; CHECK-FP16-NEXT: bx lr +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/ARM/inline-asm-clobber.ll b/llvm/test/CodeGen/ARM/inline-asm-clobber.ll index 7b1331f3f1e8..f44ad2a896ad 100644 --- a/llvm/test/CodeGen/ARM/inline-asm-clobber.ll +++ b/llvm/test/CodeGen/ARM/inline-asm-clobber.ll @@ -6,12 +6,19 @@ ; RUN: llc <%s -mtriple=arm-none-eabi --frame-pointer=all 2>&1 \ ; RUN: | FileCheck %s -check-prefix=NO_FP_ELIM +; RUN: llc <%s -mtriple=armv6-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS2 +; RUN: llc <%s -mtriple=armv6k-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS2 +; RUN: llc <%s -mtriple=armv6k-apple-ios3 2>&1 | FileCheck %s -check-prefix=IOS3 +; RUN: llc <%s -mtriple=armv7-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS3 + ; CHECK: warning: inline asm clobber list contains reserved registers: SP, PC ; CHECK: warning: inline asm clobber list contains reserved registers: R11 ; RWPI: warning: inline asm clobber list contains reserved registers: R9, SP, PC ; RWPI: warning: inline asm clobber list contains reserved registers: R11 ; NO_FP_ELIM: warning: inline asm clobber list contains reserved registers: R11, SP, PC ; NO_FP_ELIM: warning: inline asm clobber list contains reserved registers: R11 +; IOS2: warning: inline asm clobber list contains reserved registers: R9, SP, PC +; IOS3: warning: inline asm clobber list contains reserved registers: SP, PC define void @foo() nounwind { call void asm sideeffect "mov r7, #1", diff --git a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll index c92839020f83..90142cbf6bff 100644 --- a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll @@ -6,7 +6,7 @@ target triple = "armv7-eabi" declare void @bar1() define void @foo()#0 { -; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30 +; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpscr_rm $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30 call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/ARM/issue159343.ll b/llvm/test/CodeGen/ARM/issue159343.ll new file mode 100644 index 000000000000..03292582918a --- /dev/null +++ b/llvm/test/CodeGen/ARM/issue159343.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s | FileCheck %s + +; Make sure there's no assertion from peephole-opt introducing illegal +; subregister index uses. + +target triple = "thumbv7-unknown-linux-android29" + +define void @_ZN11VersionEdit10DecodeFromEv(i1 %call4, ptr %__profc__ZN11VersionEdit10DecodeFromEv) nounwind { +; CHECK-LABEL: _ZN11VersionEdit10DecodeFromEv: +; CHECK: @ %bb.0: @ %land.rhs.lr.ph +; CHECK-NEXT: lsls r0, r0, #31 +; CHECK-NEXT: beq .LBB0_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: adr r0, .LCPI0_0 +; CHECK-NEXT: vld1.64 {d0, d1}, [r0:128] +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: @ %select.false +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: .LBB0_3: @ %select.end +; CHECK-NEXT: vldr s5, .LCPI0_1 +; CHECK-NEXT: vldr s4, .LCPI0_2 +; CHECK-NEXT: vmov.f32 s6, s0 +; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vst1.64 {d2, d3}, [r1] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long 0x00000000 @ float 0 +; CHECK-NEXT: .LCPI0_2: +; CHECK-NEXT: .long 0x00000001 @ float 1.40129846E-45 +land.rhs.lr.ph: + br i1 %call4, label %sw.bb, label %while.cond.while.end_crit_edge.split.loop.exit43 + +while.cond.while.end_crit_edge.split.loop.exit43: ; preds = %land.rhs.lr.ph + %ext0 = extractelement <4 x i64> zeroinitializer, i64 0 + br label %while.cond.while.end_crit_edge + +while.cond.while.end_crit_edge: ; preds = %sw.bb, %while.cond.while.end_crit_edge.split.loop.exit43 + %pgocount5374.ph = phi i64 [ %ext1, %sw.bb ], [ %ext0, %while.cond.while.end_crit_edge.split.loop.exit43 ] + %ins = insertelement <2 x i64> splat (i64 1), i64 %pgocount5374.ph, i64 1 + store <2 x i64> %ins, ptr %__profc__ZN11VersionEdit10DecodeFromEv, align 8 + ret void + +sw.bb: ; preds = %land.rhs.lr.ph + %ext1 = extractelement <4 x i64> splat (i64 1), i64 0 + br label %while.cond.while.end_crit_edge +} + diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll index 749ee00a3c68..7274a8b0ce34 100644 --- a/llvm/test/CodeGen/ARM/llrint-conv.ll +++ b/llvm/test/CodeGen/ARM/llrint-conv.ll @@ -1,46 +1,80 @@ -; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP -; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT +; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 -; SOFTFP-LABEL: testmsxh_builtin: -; SOFTFP: bl llrintf -; HARDFP-LABEL: testmsxh_builtin: -; HARDFP: bl llrintf define i64 @testmsxh_builtin(half %x) { +; CHECK-SOFT-LABEL: testmsxh_builtin: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: .save {r11, lr} +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: bl __aeabi_h2f +; CHECK-SOFT-NEXT: bl llrintf +; CHECK-SOFT-NEXT: pop {r11, pc} +; +; CHECK-NOFP16-LABEL: testmsxh_builtin: +; CHECK-NOFP16: @ %bb.0: @ %entry +; CHECK-NOFP16-NEXT: .save {r11, lr} +; CHECK-NOFP16-NEXT: push {r11, lr} +; CHECK-NOFP16-NEXT: vmov r0, s0 +; CHECK-NOFP16-NEXT: bl __aeabi_h2f +; CHECK-NOFP16-NEXT: vmov s0, r0 +; CHECK-NOFP16-NEXT: bl llrintf +; CHECK-NOFP16-NEXT: pop {r11, pc} +; +; CHECK-FPv8-LABEL: testmsxh_builtin: +; CHECK-FPv8: @ %bb.0: @ %entry +; CHECK-FPv8-NEXT: .save {r11, lr} +; CHECK-FPv8-NEXT: push {r11, lr} +; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FPv8-NEXT: bl llrintf +; CHECK-FPv8-NEXT: pop {r11, pc} +; +; CHECK-FP16-LABEL: testmsxh_builtin: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: .save {r11, lr} +; CHECK-FP16-NEXT: push {r11, lr} +; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FP16-NEXT: bl llrintf +; CHECK-FP16-NEXT: pop {r11, pc} entry: %0 = tail call i64 @llvm.llrint.i64.f16(half %x) ret i64 %0 } -; SOFTFP-LABEL: testmsxs_builtin: -; SOFTFP: bl llrintf -; HARDFP-LABEL: testmsxs_builtin: -; HARDFP: bl llrintf define i64 @testmsxs_builtin(float %x) { +; CHECK-LABEL: testmsxs_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: pop {r11, pc} entry: %0 = tail call i64 @llvm.llrint.i64.f32(float %x) ret i64 %0 } -; SOFTFP-LABEL: testmsxd_builtin: -; SOFTFP: bl llrint -; HARDFP-LABEL: testmsxd_builtin: -; HARDFP: bl llrint define i64 @testmsxd_builtin(double %x) { +; CHECK-LABEL: testmsxd_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl llrint +; CHECK-NEXT: pop {r11, pc} entry: %0 = tail call i64 @llvm.llrint.i64.f64(double %x) ret i64 %0 } -; FIXME(#44744): incorrect libcall -; SOFTFP-LABEL: testmsxq_builtin: -; SOFTFP: bl llrintl -; HARDFP-LABEL: testmsxq_builtin: -; HARDFP: bl llrintl define i64 @testmsxq_builtin(fp128 %x) { +; CHECK-LABEL: testmsxq_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: pop {r11, pc} entry: %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) ret i64 %0 } - -declare i64 @llvm.llrint.i64.f32(float) nounwind readnone -declare i64 @llvm.llrint.i64.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/llround-conv.ll b/llvm/test/CodeGen/ARM/llround-conv.ll index 0f57e4ab50a5..f734db89af2f 100644 --- a/llvm/test/CodeGen/ARM/llround-conv.ll +++ b/llvm/test/CodeGen/ARM/llround-conv.ll @@ -1,25 +1,71 @@ -; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP -; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT +; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 + +define i64 @testmsxh_builtin(half %x) { +; CHECK-SOFT-LABEL: testmsxh_builtin: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: .save {r11, lr} +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: bl __aeabi_h2f +; CHECK-SOFT-NEXT: bl llroundf +; CHECK-SOFT-NEXT: pop {r11, pc} +; +; CHECK-NOFP16-LABEL: testmsxh_builtin: +; CHECK-NOFP16: @ %bb.0: @ %entry +; CHECK-NOFP16-NEXT: .save {r11, lr} +; CHECK-NOFP16-NEXT: push {r11, lr} +; CHECK-NOFP16-NEXT: vmov r0, s0 +; CHECK-NOFP16-NEXT: bl __aeabi_h2f +; CHECK-NOFP16-NEXT: vmov s0, r0 +; CHECK-NOFP16-NEXT: bl llroundf +; CHECK-NOFP16-NEXT: pop {r11, pc} +; +; CHECK-FP16-LABEL: testmsxh_builtin: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: .save {r11, lr} +; CHECK-FP16-NEXT: push {r11, lr} +; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FP16-NEXT: bl llroundf +; CHECK-FP16-NEXT: pop {r11, pc} +entry: + %0 = tail call i64 @llvm.llround.i64.f16(half %x) + ret i64 %0 +} -; SOFTFP-LABEL: testmsxs_builtin: -; SOFTFP: bl llroundf -; HARDFP-LABEL: testmsxs_builtin: -; HARDFP: bl llroundf define i64 @testmsxs_builtin(float %x) { +; CHECK-LABEL: testmsxs_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl llroundf +; CHECK-NEXT: pop {r11, pc} entry: - %0 = tail call i64 @llvm.llround.f32(float %x) + %0 = tail call i64 @llvm.llround.i64.f32(float %x) ret i64 %0 } -; SOFTFP-LABEL: testmsxd_builtin: -; SOFTFP: bl llround -; HARDFP-LABEL: testmsxd_builtin: -; HARDFP: bl llround define i64 @testmsxd_builtin(double %x) { +; CHECK-LABEL: testmsxd_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl llround +; CHECK-NEXT: pop {r11, pc} entry: - %0 = tail call i64 @llvm.llround.f64(double %x) + %0 = tail call i64 @llvm.llround.i64.f64(double %x) ret i64 %0 } -declare i64 @llvm.llround.f32(float) nounwind readnone -declare i64 @llvm.llround.f64(double) nounwind readnone +define i64 @testmsxq_builtin(fp128 %x) { +; CHECK-LABEL: testmsxq_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl llroundl +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = tail call i64 @llvm.llround.i64.f128(fp128 %x) + ret i64 %0 +} diff --git a/llvm/test/CodeGen/ARM/llvm.exp10.ll b/llvm/test/CodeGen/ARM/llvm.exp10.ll index eb72fe8c1e1b..49397ca386cb 100644 --- a/llvm/test/CodeGen/ARM/llvm.exp10.ll +++ b/llvm/test/CodeGen/ARM/llvm.exp10.ll @@ -189,12 +189,13 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) { ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl exp10f +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov s17, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: vmov s16, r0 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: vmov s18, r6 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -207,7 +208,6 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) { ; CHECK: @ %bb.0: ; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: mov r4, r3 @@ -216,17 +216,15 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) { ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl exp10f -; CHECK-NEXT: vmov s19, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl exp10f -; CHECK-NEXT: vmov s18, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov s17, r7 ; CHECK-NEXT: bl exp10f -; CHECK-NEXT: vmov s16, r0 -; CHECK-NEXT: vmov r2, r3, d9 -; CHECK-NEXT: vmov r0, r1, d8 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} %r = call <4 x float> @llvm.exp10.v4f32(<4 x float> %x) diff --git a/llvm/test/CodeGen/ARM/llvm.frexp.ll b/llvm/test/CodeGen/ARM/llvm.frexp.ll index 376426d701b3..80972b75cf28 100644 --- a/llvm/test/CodeGen/ARM/llvm.frexp.ll +++ b/llvm/test/CodeGen/ARM/llvm.frexp.ll @@ -362,33 +362,31 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) { define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) { ; CHECK-LABEL: test_frexp_v4f32_v4i32_only_use_fract: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r1, sp -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: bl frexpf +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: sub sp, #20 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: add r1, sp, #4 -; CHECK-NEXT: vmov s19, r0 -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: add r1, sp, #8 -; CHECK-NEXT: vmov s18, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: add r1, sp, #12 -; CHECK-NEXT: vmov s17, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: vmov s16, r0 -; CHECK-NEXT: vmov r2, r3, d9 -; CHECK-NEXT: vmov r0, r1, d8 -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: add r1, sp, #16 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: add sp, #20 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} %result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a) %result.0 = extractvalue { <4 x float>, <4 x i32> } %result, 0 ret <4 x float> %result.0 diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll index 9aa95112af53..2de234919a14 100644 --- a/llvm/test/CodeGen/ARM/lrint-conv.ll +++ b/llvm/test/CodeGen/ARM/lrint-conv.ll @@ -1,43 +1,70 @@ -; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP -; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT +; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 -; FIXME: crash -; define i32 @testmswh_builtin(half %x) { -; entry: -; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) -; ret i32 %0 -; } +define i32 @testmswh_builtin(half %x) { +; CHECK-SOFT-LABEL: testmswh_builtin: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: .save {r11, lr} +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: bl __aeabi_h2f +; CHECK-SOFT-NEXT: pop {r11, lr} +; CHECK-SOFT-NEXT: b lrintf +; +; CHECK-NOFP16-LABEL: testmswh_builtin: +; CHECK-NOFP16: @ %bb.0: @ %entry +; CHECK-NOFP16-NEXT: .save {r11, lr} +; CHECK-NOFP16-NEXT: push {r11, lr} +; CHECK-NOFP16-NEXT: vmov r0, s0 +; CHECK-NOFP16-NEXT: bl __aeabi_h2f +; CHECK-NOFP16-NEXT: vmov s0, r0 +; CHECK-NOFP16-NEXT: pop {r11, lr} +; CHECK-NOFP16-NEXT: b lrintf +; +; CHECK-FPv8-LABEL: testmswh_builtin: +; CHECK-FPv8: @ %bb.0: @ %entry +; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FPv8-NEXT: b lrintf +; +; CHECK-FP16-LABEL: testmswh_builtin: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: vrintx.f16 s0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.lrint.i32.f16(half %x) + ret i32 %0 +} -; SOFTFP-LABEL: testmsws_builtin: -; SOFTFP: bl lrintf -; HARDFP-LABEL: testmsws_builtin: -; HARDFP: bl lrintf define i32 @testmsws_builtin(float %x) { +; CHECK-LABEL: testmsws_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: b lrintf entry: %0 = tail call i32 @llvm.lrint.i32.f32(float %x) ret i32 %0 } -; SOFTFP-LABEL: testmswd_builtin: -; SOFTFP: bl lrint -; HARDFP-LABEL: testmswd_builtin: -; HARDFP: bl lrint define i32 @testmswd_builtin(double %x) { +; CHECK-LABEL: testmswd_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: b lrint entry: %0 = tail call i32 @llvm.lrint.i32.f64(double %x) ret i32 %0 } -; FIXME(#44744): incorrect libcall -; SOFTFP-LABEL: testmswq_builtin: -; SOFTFP: bl lrintl -; HARDFP-LABEL: testmswq_builtin: -; HARDFP: bl lrintl define i32 @testmswq_builtin(fp128 %x) { +; CHECK-LABEL: testmswq_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl lrintl +; CHECK-NEXT: pop {r11, pc} entry: %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x) ret i32 %0 } - -declare i32 @llvm.lrint.i32.f32(float) nounwind readnone -declare i32 @llvm.lrint.i32.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/lround-conv.ll b/llvm/test/CodeGen/ARM/lround-conv.ll index 3aaed74830b8..03f7a0d7a44c 100644 --- a/llvm/test/CodeGen/ARM/lround-conv.ll +++ b/llvm/test/CodeGen/ARM/lround-conv.ll @@ -1,25 +1,47 @@ -; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP -; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT +; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 + +;define i32 @testmswh_builtin(half %x) { +;entry: +; %0 = tail call i32 @llvm.lround.i32.f16(half %x) +; ret i32 %0 +;} -; SOFTFP-LABEL: testmsws_builtin: -; SOFTFP: bl lroundf -; HARDFP-LABEL: testmsws_builtin: -; HARDFP: bl lroundf define i32 @testmsws_builtin(float %x) { +; CHECK-LABEL: testmsws_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: b lroundf entry: %0 = tail call i32 @llvm.lround.i32.f32(float %x) ret i32 %0 } -; SOFTFP-LABEL: testmswd_builtin: -; SOFTFP: bl lround -; HARDFP-LABEL: testmswd_builtin: -; HARDFP: bl lround define i32 @testmswd_builtin(double %x) { +; CHECK-LABEL: testmswd_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: b lround entry: %0 = tail call i32 @llvm.lround.i32.f64(double %x) ret i32 %0 } -declare i32 @llvm.lround.i32.f32(float) nounwind readnone -declare i32 @llvm.lround.i32.f64(double) nounwind readnone +define i32 @testmswq_builtin(fp128 %x) { +; CHECK-LABEL: testmswq_builtin: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl lroundl +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = tail call i32 @llvm.lround.i32.f128(fp128 %x) + ret i32 %0 +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-FP16: {{.*}} +; CHECK-FPv8: {{.*}} +; CHECK-NOFP16: {{.*}} +; CHECK-SOFT: {{.*}} diff --git a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir index 46f3e4b08559..17d66196505a 100644 --- a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir +++ b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir @@ -14,7 +14,7 @@ # CHECK: SU(1): %1:dpr = VABSD %0:dpr, 14, $noreg # CHECK: SU(2): %2:dpr = VLDRD %const.0, 0, 14, $noreg :: (load (s64) from constant-pool) # CHECK: SU(4): %3:rgpr = t2MOVi 0, 14, $noreg, $noreg -# CHECK: SU(3): VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv +# CHECK: SU(3): VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm # CHECK: SU(5): $r0 = COPY %3:rgpr --- name: test @@ -29,7 +29,7 @@ body: | %0:dpr = COPY $d0 %1:dpr = VABSD %0, 14 /* CC::al */, $noreg %2:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool) - VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv + VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm %4:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg $r0 = COPY %4 tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 diff --git a/llvm/test/CodeGen/ARM/nnan-fsub.ll b/llvm/test/CodeGen/ARM/nnan-fsub.ll index 01839083547b..78dd36f95491 100644 --- a/llvm/test/CodeGen/ARM/nnan-fsub.ll +++ b/llvm/test/CodeGen/ARM/nnan-fsub.ll @@ -1,18 +1,22 @@ -; RUN: llc -mcpu=cortex-a9 < %s | FileCheck -check-prefix=SAFE %s -; RUN: llc -mcpu=cortex-a9 --enable-no-nans-fp-math < %s | FileCheck -check-prefix=FAST %s +; RUN: llc -mcpu=cortex-a9 < %s | FileCheck %s target triple = "armv7-apple-ios" -; SAFE: test -; FAST: test +; CHECK-LABEL: test define float @test(float %x, float %y) { entry: -; SAFE: vmul.f32 -; SAFE: vsub.f32 -; FAST: mov r0, #0 +; CHECK: vmul.f32 +; CHECK-NEXT: vsub.f32 %0 = fmul float %x, %y %1 = fsub float %0, %0 ret float %1 } - +; CHECK-LABEL: test_nnan +define float @test_nnan(float %x, float %y) { +entry: +; CHECK: mov r0, #0 + %0 = fmul float %x, %y + %1 = fsub nnan float %0, %0 + ret float %1 +} diff --git a/llvm/test/CodeGen/ARM/pr159343.mir b/llvm/test/CodeGen/ARM/pr159343.mir new file mode 100644 index 000000000000..9b71b1ad94b2 --- /dev/null +++ b/llvm/test/CodeGen/ARM/pr159343.mir @@ -0,0 +1,31 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -run-pass=peephole-opt -verify-machineinstrs -mtriple=thumbv7-unknown-linux-android29 %s -o - | FileCheck %s +--- +name: Test_shouldRewriteCopySrc_Invalid_SubReg +tracksRegLiveness: true +body: | + bb.1: + liveins: $r0, $r1 + + ; CHECK-LABEL: name: Test_shouldRewriteCopySrc_Invalid_SubReg + ; CHECK: liveins: $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:dpair = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr_vfp2 = COPY [[DEF]].dsub_0 + ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[COPY]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]].ssub_1 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:mqpr = REG_SEQUENCE killed [[DEF2]], %subreg.ssub_0, killed [[DEF1]], %subreg.ssub_1, killed [[DEF3]], %subreg.ssub_2, [[COPY]].ssub_1, %subreg.ssub_3 + ; CHECK-NEXT: VST1q64 $r1, 0, killed [[REG_SEQUENCE]], 14 /* CC::al */, $noreg + %0:dpair = IMPLICIT_DEF + %1:dpr = COPY %0.dsub_0 + %2:gpr, %3:gpr = VMOVRRD killed %1, 14 /* CC::al */, $noreg + %4:spr = VMOVSR killed %3, 14 /* CC::al */, $noreg + %5:spr = IMPLICIT_DEF + %6:spr = IMPLICIT_DEF + %7:spr = IMPLICIT_DEF + %8:mqpr = REG_SEQUENCE killed %6, %subreg.ssub_0, killed %5, %subreg.ssub_1, killed %7, %subreg.ssub_2, killed %4, %subreg.ssub_3 + VST1q64 $r1, 0, killed %8, 14 /* CC::al */, $noreg +... diff --git a/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll b/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll index e653aaa316fe..2bf8f29eccb4 100644 --- a/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll +++ b/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll @@ -12,8 +12,8 @@ define float @shouldRewriteCopySrc(double %arg) #0 { ; CHECK-NEXT: @APP ; CHECK-NEXT: nop ; CHECK-NEXT: @NO_APP -; CHECK-NEXT: vmov r0, r1, d16 -; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vmov.f64 d0, d16 +; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $d0 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr bb: diff --git a/llvm/test/CodeGen/ARM/sincos.ll b/llvm/test/CodeGen/ARM/sincos.ll index e1b683a8a665..1a4313e651d7 100644 --- a/llvm/test/CodeGen/ARM/sincos.ll +++ b/llvm/test/CodeGen/ARM/sincos.ll @@ -2,8 +2,7 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS ; RUN: llc < %s -mtriple=armv7-linux-gnu -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU -; RUN: llc < %s -mtriple=armv7-linux-android -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT-ANDROID -; RUN: llc < %s -mtriple=armv7-linux-android9 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU +; RUN: llc < %s -mtriple=armv7-linux-android -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU ; Combine sin / cos into a single call unless they may write errno (as ; captured by readnone attrbiute, controlled by clang -fmath-errno @@ -22,10 +21,6 @@ entry: ; NOOPT: bl _sinf ; NOOPT: bl _cosf -; NOOPT-ANDROID-LABEL: test1: -; NOOPT-ANDROID: bl sinf -; NOOPT-ANDROID: bl cosf - %call = tail call float @sinf(float %x) readnone %call1 = tail call float @cosf(float %x) readnone %add = fadd float %call, %call1 @@ -44,10 +39,6 @@ entry: ; NOOPT: bl _sinf ; NOOPT: bl _cosf -; NOOPT-ANDROID-LABEL: test1_fast: -; NOOPT-ANDROID: bl sinf -; NOOPT-ANDROID: bl cosf - %call = tail call fast float @sinf(float %x) readnone %call1 = tail call fast float @cosf(float %x) readnone %add = fadd float %call, %call1 @@ -68,10 +59,6 @@ entry: ; NOOPT: bl _sinf ; NOOPT: bl _cosf -; NOOPT-ANDROID-LABEL: test1_errno: -; NOOPT-ANDROID: bl sinf -; NOOPT-ANDROID: bl cosf - %call = tail call float @sinf(float %x) %call1 = tail call float @cosf(float %x) %add = fadd float %call, %call1 @@ -90,10 +77,6 @@ entry: ; NOOPT: bl _sin ; NOOPT: bl _cos -; NOOPT-ANDROID-LABEL: test2: -; NOOPT-ANDROID: bl sin -; NOOPT-ANDROID: bl cos - %call = tail call double @sin(double %x) readnone %call1 = tail call double @cos(double %x) readnone %add = fadd double %call, %call1 @@ -112,10 +95,6 @@ entry: ; NOOPT: bl _sin ; NOOPT: bl _cos -; NOOPT-ANDROID-LABEL: test2_fast: -; NOOPT-ANDROID: bl sin -; NOOPT-ANDROID: bl cos - %call = tail call fast double @sin(double %x) readnone %call1 = tail call fast double @cos(double %x) readnone %add = fadd double %call, %call1 @@ -136,10 +115,6 @@ entry: ; NOOPT: bl _sin ; NOOPT: bl _cos -; NOOPT-ANDROID-LABEL: test2_errno: -; NOOPT-ANDROID: bl sin -; NOOPT-ANDROID: bl cos - %call = tail call double @sin(double %x) %call1 = tail call double @cos(double %x) %add = fadd double %call, %call1 diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll index fe5e3cbcdf77..c3c88840b1a6 100644 --- a/llvm/test/CodeGen/ARM/vector-lrint.ll +++ b/llvm/test/CodeGen/ARM/vector-lrint.ll @@ -9,36 +9,1290 @@ ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64 -; FIXME: crash "Do not know how to soft promote this operator's operand!" -; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { -; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x) -; ret <1 x iXLen> %a -; } -; declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>) +define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { +; LE-I32-LABEL: lrint_v1f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: vmov r0, s0 +; LE-I32-NEXT: bl __aeabi_f2h +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v1f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r11, lr} +; LE-I64-NEXT: push {r11, lr} +; LE-I64-NEXT: vmov r0, s0 +; LE-I64-NEXT: bl __aeabi_f2h +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEXT: pop {r11, pc} +; +; BE-I32-LABEL: lrint_v1f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_f2h +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v1f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r11, lr} +; BE-I64-NEXT: push {r11, lr} +; BE-I64-NEXT: vmov r0, s0 +; BE-I64-NEXT: bl __aeabi_f2h +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: pop {r11, pc} + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x) + ret <1 x iXLen> %a +} -; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { -; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x) -; ret <2 x iXLen> %a -; } -; declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>) +define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { +; LE-I32-LABEL: lrint_v2f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8} +; LE-I32-NEXT: vpush {d8} +; LE-I32-NEXT: vmov r0, s0 +; LE-I32-NEXT: vmov.f32 s16, s1 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov r1, s16 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: mov r0, r1 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vorr d0, d8, d8 +; LE-I32-NEXT: vpop {d8} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v2f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r11, lr} +; LE-I64-NEXT: push {r4, r5, r11, lr} +; LE-I64-NEXT: .vsave {d8, d9} +; LE-I64-NEXT: vpush {d8, d9} +; LE-I64-NEXT: vmov r0, s1 +; LE-I64-NEXT: vmov.f32 s16, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: vmov r0, s16 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d9[0], r4 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d9[1], r5 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q0, q4, q4 +; LE-I64-NEXT: vpop {d8, d9} +; LE-I64-NEXT: pop {r4, r5, r11, pc} +; +; BE-I32-LABEL: lrint_v2f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8} +; BE-I32-NEXT: vpush {d8} +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: vmov.f32 s16, s1 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov r1, s16 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: mov r0, r1 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vrev64.32 d0, d8 +; BE-I32-NEXT: vpop {d8} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v2f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r11, lr} +; BE-I64-NEXT: push {r4, r5, r11, lr} +; BE-I64-NEXT: .vsave {d8} +; BE-I64-NEXT: vpush {d8} +; BE-I64-NEXT: vmov r0, s1 +; BE-I64-NEXT: vmov.f32 s16, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vmov r0, s16 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vmov.32 d8[0], r4 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d8[1], r5 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d8 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: vpop {d8} +; BE-I64-NEXT: pop {r4, r5, r11, pc} + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x) + ret <2 x iXLen> %a +} -; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { -; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x) -; ret <4 x iXLen> %a -; } -; declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>) +define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { +; LE-I32-LABEL: lrint_v4f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r11, lr} +; LE-I32-NEXT: push {r4, r5, r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11} +; LE-I32-NEXT: vpush {d8, d9, d10, d11} +; LE-I32-NEXT: vmov r0, s3 +; LE-I32-NEXT: vmov.f32 s16, s2 +; LE-I32-NEXT: vmov.f32 s18, s1 +; LE-I32-NEXT: vmov.f32 s20, s0 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: vmov r0, s16 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: vmov r0, s20 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r5 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: vmov r0, s18 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: vmov.32 d11[1], r4 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vorr q0, q5, q5 +; LE-I32-NEXT: vpop {d8, d9, d10, d11} +; LE-I32-NEXT: pop {r4, r5, r11, pc} +; +; LE-I64-LABEL: lrint_v4f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r11, lr} +; LE-I64-NEXT: .vsave {d12, d13} +; LE-I64-NEXT: vpush {d12, d13} +; LE-I64-NEXT: .vsave {d8, d9, d10} +; LE-I64-NEXT: vpush {d8, d9, d10} +; LE-I64-NEXT: vmov r0, s1 +; LE-I64-NEXT: vmov.f32 s16, s3 +; LE-I64-NEXT: vmov.f32 s20, s2 +; LE-I64-NEXT: vmov.f32 s18, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vmov r0, s18 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: vmov r0, s16 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r7 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: vmov r0, s20 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d13[0], r5 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d13[1], r4 +; LE-I64-NEXT: vmov.32 d9[1], r6 +; LE-I64-NEXT: vmov.32 d12[1], r7 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q0, q6, q6 +; LE-I64-NEXT: vorr q1, q4, q4 +; LE-I64-NEXT: vpop {d8, d9, d10} +; LE-I64-NEXT: vpop {d12, d13} +; LE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; BE-I32-LABEL: lrint_v4f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r11, lr} +; BE-I32-NEXT: push {r4, r5, r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11} +; BE-I32-NEXT: vpush {d8, d9, d10, d11} +; BE-I32-NEXT: vmov r0, s3 +; BE-I32-NEXT: vmov.f32 s16, s2 +; BE-I32-NEXT: vmov.f32 s18, s1 +; BE-I32-NEXT: vmov.f32 s20, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: vmov r0, s16 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: vmov r0, s20 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r5 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: vmov r0, s18 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: vmov.32 d11[1], r4 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vpop {d8, d9, d10, d11} +; BE-I32-NEXT: pop {r4, r5, r11, pc} +; +; BE-I64-LABEL: lrint_v4f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r11, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10} +; BE-I64-NEXT: vpush {d8, d9, d10} +; BE-I64-NEXT: vmov r0, s1 +; BE-I64-NEXT: vmov.f32 s16, s3 +; BE-I64-NEXT: vmov.f32 s18, s2 +; BE-I64-NEXT: vmov.f32 s20, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: vmov r0, s20 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r7, r0 +; BE-I64-NEXT: vmov r0, s16 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r7 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: vmov r0, s18 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vmov.32 d9[0], r5 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: vmov.32 d8[1], r6 +; BE-I64-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d9 +; BE-I64-NEXT: vrev64.32 d3, d8 +; BE-I64-NEXT: vrev64.32 d0, d10 +; BE-I64-NEXT: vrev64.32 d2, d16 +; BE-I64-NEXT: vpop {d8, d9, d10} +; BE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc} + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x) + ret <4 x iXLen> %a +} -; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { -; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x) -; ret <8 x iXLen> %a -; } -; declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) +define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { +; LE-I32-LABEL: lrint_v8f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; LE-I32-NEXT: vmov r0, s7 +; LE-I32-NEXT: vmov.f32 s18, s6 +; LE-I32-NEXT: vmov.f32 s16, s5 +; LE-I32-NEXT: vmov.f32 s20, s4 +; LE-I32-NEXT: vmov.f32 s22, s3 +; LE-I32-NEXT: vmov.f32 s24, s2 +; LE-I32-NEXT: vmov.f32 s26, s1 +; LE-I32-NEXT: vmov.f32 s28, s0 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: mov r8, r0 +; LE-I32-NEXT: vmov r0, s26 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r9, r0 +; LE-I32-NEXT: vmov r0, s22 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r6, r0 +; LE-I32-NEXT: vmov r0, s28 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r7, r0 +; LE-I32-NEXT: vmov r0, s24 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: vmov r0, s18 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: vmov r0, s20 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r5 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r4 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r7 +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r6 +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r9 +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: vmov r0, s16 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: vmov.32 d11[1], r8 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vorr q0, q6, q6 +; LE-I32-NEXT: vorr q1, q5, q5 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; +; LE-I64-LABEL: lrint_v8f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #8 +; LE-I64-NEXT: sub sp, sp, #8 +; LE-I64-NEXT: vmov r0, s1 +; LE-I64-NEXT: vstr s6, [sp, #4] @ 4-byte Spill +; LE-I64-NEXT: vmov.f32 s16, s7 +; LE-I64-NEXT: vmov.f32 s18, s5 +; LE-I64-NEXT: vmov.f32 s20, s4 +; LE-I64-NEXT: vmov.f32 s22, s3 +; LE-I64-NEXT: vmov.f32 s24, s2 +; LE-I64-NEXT: vmov.f32 s26, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r9, r0 +; LE-I64-NEXT: vmov r0, s26 +; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r10, r0 +; LE-I64-NEXT: vmov r0, s22 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vmov r0, s24 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: vmov r0, s18 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r6, r0 +; LE-I64-NEXT: vmov r0, s20 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: vmov r0, s16 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r4 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r6 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r7 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r5 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r10 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov r0, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d9[0], r9 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: vmov.32 d11[1], r11 +; LE-I64-NEXT: vmov.32 d8[1], r4 +; LE-I64-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEXT: vorr q0, q4, q4 +; LE-I64-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEXT: vorr q1, q7, q7 +; LE-I64-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEXT: vorr q2, q6, q6 +; LE-I64-NEXT: vorr q3, q5, q5 +; LE-I64-NEXT: add sp, sp, #8 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v8f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-I32-NEXT: vmov r0, s1 +; BE-I32-NEXT: vmov.f32 s18, s7 +; BE-I32-NEXT: vmov.f32 s20, s6 +; BE-I32-NEXT: vmov.f32 s16, s5 +; BE-I32-NEXT: vmov.f32 s22, s4 +; BE-I32-NEXT: vmov.f32 s24, s3 +; BE-I32-NEXT: vmov.f32 s26, s2 +; BE-I32-NEXT: vmov.f32 s28, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: mov r8, r0 +; BE-I32-NEXT: vmov r0, s24 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r9, r0 +; BE-I32-NEXT: vmov r0, s18 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r6, r0 +; BE-I32-NEXT: vmov r0, s26 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r7, r0 +; BE-I32-NEXT: vmov r0, s20 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: vmov r0, s28 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: vmov r0, s22 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r5 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r4 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r7 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r6 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r9 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: vmov r0, s16 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: vmov.32 d12[1], r8 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q6 +; BE-I32-NEXT: vrev64.32 q1, q5 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; +; BE-I64-LABEL: lrint_v8f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: .pad #8 +; BE-I64-NEXT: sub sp, sp, #8 +; BE-I64-NEXT: vmov r0, s1 +; BE-I64-NEXT: vmov.f32 s18, s7 +; BE-I64-NEXT: vmov.f32 s16, s6 +; BE-I64-NEXT: vmov.f32 s20, s5 +; BE-I64-NEXT: vmov.f32 s22, s4 +; BE-I64-NEXT: vmov.f32 s24, s3 +; BE-I64-NEXT: vmov.f32 s26, s2 +; BE-I64-NEXT: vmov.f32 s28, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: mov r9, r0 +; BE-I64-NEXT: vmov r0, s28 +; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r10, r0 +; BE-I64-NEXT: vmov r0, s24 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: vmov r0, s26 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r7, r0 +; BE-I64-NEXT: vmov r0, s20 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r6, r0 +; BE-I64-NEXT: vmov r0, s22 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vmov r0, s18 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r4 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r6 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r7 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r5 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r10 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: vmov r0, s16 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vmov.32 d8[0], r9 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; BE-I64-NEXT: vmov.32 d13[1], r5 +; BE-I64-NEXT: vmov.32 d8[1], r0 +; BE-I64-NEXT: vmov.32 d11[1], r6 +; BE-I64-NEXT: vmov.32 d9[1], r11 +; BE-I64-NEXT: vmov.32 d14[1], r4 +; BE-I64-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEXT: vmov.32 d10[1], r8 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d8 +; BE-I64-NEXT: vrev64.32 d3, d13 +; BE-I64-NEXT: vrev64.32 d5, d11 +; BE-I64-NEXT: vrev64.32 d7, d9 +; BE-I64-NEXT: vrev64.32 d0, d14 +; BE-I64-NEXT: vrev64.32 d2, d12 +; BE-I64-NEXT: vrev64.32 d4, d10 +; BE-I64-NEXT: vrev64.32 d6, d16 +; BE-I64-NEXT: add sp, sp, #8 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x) + ret <8 x iXLen> %a +} -; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { -; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) -; ret <16 x iXLen> %a -; } -; declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) +define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { +; LE-I32-LABEL: lrint_v16f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: .pad #8 +; LE-I32-NEXT: sub sp, sp, #8 +; LE-I32-NEXT: vmov r0, s15 +; LE-I32-NEXT: vstr s13, [sp, #4] @ 4-byte Spill +; LE-I32-NEXT: vmov.f32 s26, s14 +; LE-I32-NEXT: vstr s0, [sp] @ 4-byte Spill +; LE-I32-NEXT: vmov.f32 s20, s12 +; LE-I32-NEXT: vmov.f32 s22, s11 +; LE-I32-NEXT: vmov.f32 s18, s10 +; LE-I32-NEXT: vmov.f32 s17, s9 +; LE-I32-NEXT: vmov.f32 s24, s8 +; LE-I32-NEXT: vmov.f32 s19, s7 +; LE-I32-NEXT: vmov.f32 s30, s6 +; LE-I32-NEXT: vmov.f32 s21, s5 +; LE-I32-NEXT: vmov.f32 s16, s4 +; LE-I32-NEXT: vmov.f32 s23, s3 +; LE-I32-NEXT: vmov.f32 s28, s2 +; LE-I32-NEXT: vmov.f32 s25, s1 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: mov r8, r0 +; LE-I32-NEXT: vmov r0, s17 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r9, r0 +; LE-I32-NEXT: vmov r0, s22 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r10, r0 +; LE-I32-NEXT: vmov r0, s21 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r7, r0 +; LE-I32-NEXT: vmov r0, s19 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: vmov r0, s25 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: vmov r0, s23 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r6, r0 +; LE-I32-NEXT: vmov r0, s20 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: vmov r0, s26 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: vmov r0, s24 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: vmov r0, s18 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: vmov r0, s16 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: vmov r0, s30 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: vmov r0, s28 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: vmov r0, s0 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r6 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r5 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r4 +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r7 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r10 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r9 +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: vmov r0, s0 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: vmov.32 d11[1], r8 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vorr q0, q7, q7 +; LE-I32-NEXT: vorr q1, q4, q4 +; LE-I32-NEXT: vorr q2, q6, q6 +; LE-I32-NEXT: vorr q3, q5, q5 +; LE-I32-NEXT: add sp, sp, #8 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-I64-LABEL: lrint_v16f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #120 +; LE-I64-NEXT: sub sp, sp, #120 +; LE-I64-NEXT: mov r11, r0 +; LE-I64-NEXT: vmov r0, s7 +; LE-I64-NEXT: vstr s15, [sp, #24] @ 4-byte Spill +; LE-I64-NEXT: vmov.f32 s23, s13 +; LE-I64-NEXT: vstr s14, [sp, #100] @ 4-byte Spill +; LE-I64-NEXT: vmov.f32 s25, s12 +; LE-I64-NEXT: vmov.f32 s27, s11 +; LE-I64-NEXT: vstr s10, [sp, #104] @ 4-byte Spill +; LE-I64-NEXT: vstr s9, [sp, #108] @ 4-byte Spill +; LE-I64-NEXT: vmov.f32 s24, s8 +; LE-I64-NEXT: vmov.f32 s19, s6 +; LE-I64-NEXT: vmov.f32 s29, s5 +; LE-I64-NEXT: vmov.f32 s17, s4 +; LE-I64-NEXT: vmov.f32 s16, s3 +; LE-I64-NEXT: vmov.f32 s21, s2 +; LE-I64-NEXT: vmov.f32 s26, s1 +; LE-I64-NEXT: vmov.f32 s18, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: vmov r0, s25 +; LE-I64-NEXT: str r1, [sp, #56] @ 4-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vmov r0, s27 +; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r6, r0 +; LE-I64-NEXT: vmov r0, s29 +; LE-I64-NEXT: str r1, [sp, #112] @ 4-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vmov r0, s23 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vmov.32 d17[0], r6 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r6, r0 +; LE-I64-NEXT: vmov r0, s17 +; LE-I64-NEXT: vmov r8, s21 +; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-I64-NEXT: vmov r10, s19 +; LE-I64-NEXT: vmov.32 d10[0], r5 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vmov.32 d11[0], r6 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d11[0], r7 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r6, r0 +; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d11[1], r0 +; LE-I64-NEXT: vmov r0, s18 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vmov r0, s16 +; LE-I64-NEXT: vmov.32 d10[1], r7 +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: vmov r0, s26 +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov r8, s24 +; LE-I64-NEXT: vmov.32 d14[1], r9 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov s24, r5 +; LE-I64-NEXT: vldr s0, [sp, #24] @ 4-byte Reload +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: vmov r7, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov.f32 s0, s24 +; LE-I64-NEXT: vmov s22, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s22 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: vmov s24, r6 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov.f32 s0, s24 +; LE-I64-NEXT: vmov s22, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s22 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vldr s0, [sp, #100] @ 4-byte Reload +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEXT: vmov r0, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload +; LE-I64-NEXT: vmov s20, r0 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: vmov r4, s0 +; LE-I64-NEXT: vldr s0, [sp, #108] @ 4-byte Reload +; LE-I64-NEXT: vmov r0, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov.f32 s0, s20 +; LE-I64-NEXT: vmov s16, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s16 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: vmov s18, r7 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: mov r0, r4 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov.f32 s0, s18 +; LE-I64-NEXT: vmov s16, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s16 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vmov.32 d10[1], r4 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d16[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov.32 d19[1], r0 +; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d21[1], r10 +; LE-I64-NEXT: vmov.32 d18[1], r0 +; LE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEXT: vmov.32 d17[1], r0 +; LE-I64-NEXT: add r0, r11, #64 +; LE-I64-NEXT: vmov.32 d16[1], r1 +; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEXT: vmov.32 d20[1], r9 +; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128] +; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-I64-NEXT: add sp, sp, #120 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v16f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #16 +; BE-I32-NEXT: sub sp, sp, #16 +; BE-I32-NEXT: vmov r0, s1 +; BE-I32-NEXT: vstr s14, [sp, #4] @ 4-byte Spill +; BE-I32-NEXT: vmov.f32 s30, s15 +; BE-I32-NEXT: vstr s13, [sp, #12] @ 4-byte Spill +; BE-I32-NEXT: vmov.f32 s17, s12 +; BE-I32-NEXT: vstr s10, [sp, #8] @ 4-byte Spill +; BE-I32-NEXT: vmov.f32 s19, s11 +; BE-I32-NEXT: vstr s8, [sp] @ 4-byte Spill +; BE-I32-NEXT: vmov.f32 s21, s9 +; BE-I32-NEXT: vmov.f32 s23, s7 +; BE-I32-NEXT: vmov.f32 s24, s6 +; BE-I32-NEXT: vmov.f32 s25, s5 +; BE-I32-NEXT: vmov.f32 s26, s4 +; BE-I32-NEXT: vmov.f32 s27, s3 +; BE-I32-NEXT: vmov.f32 s28, s2 +; BE-I32-NEXT: vmov.f32 s29, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: mov r8, r0 +; BE-I32-NEXT: vmov r0, s27 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r9, r0 +; BE-I32-NEXT: vmov r0, s25 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r10, r0 +; BE-I32-NEXT: vmov r0, s23 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r7, r0 +; BE-I32-NEXT: vmov r0, s21 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: vmov r0, s19 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: vmov r0, s30 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r6, r0 +; BE-I32-NEXT: vmov r0, s17 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: vmov r0, s29 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: vmov r0, s28 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: vmov r0, s26 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: vmov r0, s24 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldr s0, [sp, #8] @ 4-byte Reload +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r6 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r5 +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r4 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r7 +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r10 +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r9 +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldr s0, [sp, #12] @ 4-byte Reload +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: vmov.32 d10[1], r8 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vrev64.32 q1, q7 +; BE-I32-NEXT: vrev64.32 q2, q6 +; BE-I32-NEXT: vrev64.32 q3, q4 +; BE-I32-NEXT: add sp, sp, #16 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-I64-LABEL: lrint_v16f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #112 +; BE-I64-NEXT: sub sp, sp, #112 +; BE-I64-NEXT: mov r11, r0 +; BE-I64-NEXT: vmov r0, s14 +; BE-I64-NEXT: vmov.f32 s17, s15 +; BE-I64-NEXT: vstr s13, [sp, #52] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s21, s12 +; BE-I64-NEXT: vstr s10, [sp, #68] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s23, s11 +; BE-I64-NEXT: vstr s7, [sp, #72] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s19, s9 +; BE-I64-NEXT: vstr s4, [sp, #28] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s26, s8 +; BE-I64-NEXT: vmov.f32 s24, s6 +; BE-I64-NEXT: vmov.f32 s18, s5 +; BE-I64-NEXT: vmov.f32 s25, s3 +; BE-I64-NEXT: vmov.f32 s16, s2 +; BE-I64-NEXT: vmov.f32 s27, s1 +; BE-I64-NEXT: vmov.f32 s29, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: mov r8, r0 +; BE-I64-NEXT: vmov r0, s29 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r9, r0 +; BE-I64-NEXT: vmov r0, s27 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r7, r0 +; BE-I64-NEXT: vmov r0, s21 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r6, r0 +; BE-I64-NEXT: vmov r0, s25 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: vmov r0, s23 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov s0, r5 +; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-I64-NEXT: vstr d16, [sp, #96] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov s0, r6 +; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill +; BE-I64-NEXT: vstr d16, [sp, #80] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov s0, r7 +; BE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill +; BE-I64-NEXT: vstr d16, [sp, #56] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r9 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vmov r0, s17 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vmov.32 d10[0], r8 +; BE-I64-NEXT: vmov r6, s19 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r6, r0 +; BE-I64-NEXT: vmov r0, s18 +; BE-I64-NEXT: vmov.32 d10[1], r4 +; BE-I64-NEXT: vstr d10, [sp, #40] @ 8-byte Spill +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vmov r0, s16 +; BE-I64-NEXT: vmov.32 d11[1], r7 +; BE-I64-NEXT: vstr d11, [sp, #32] @ 8-byte Spill +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.32 d15[1], r5 +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vstr d15, [sp, #16] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vldr s0, [sp, #28] @ 4-byte Reload +; BE-I64-NEXT: vmov r5, s26 +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov s26, r4 +; BE-I64-NEXT: vmov r0, s0 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d14[1], r10 +; BE-I64-NEXT: vmov r4, s24 +; BE-I64-NEXT: vstr d16, [sp] @ 8-byte Spill +; BE-I64-NEXT: vstr d14, [sp, #8] @ 8-byte Spill +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.f32 s0, s26 +; BE-I64-NEXT: vmov s22, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s22 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vmov s24, r6 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: mov r0, r4 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.f32 s0, s24 +; BE-I64-NEXT: vmov s22, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s22 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vldr s0, [sp, #52] @ 4-byte Reload +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEXT: vmov r0, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vldr s0, [sp, #68] @ 4-byte Reload +; BE-I64-NEXT: vmov s20, r0 +; BE-I64-NEXT: vmov.32 d11[1], r6 +; BE-I64-NEXT: vmov r7, s0 +; BE-I64-NEXT: vldr s0, [sp, #72] @ 4-byte Reload +; BE-I64-NEXT: vmov r0, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.f32 s0, s20 +; BE-I64-NEXT: vmov s16, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: vmov s18, r4 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: mov r0, r7 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.f32 s0, s18 +; BE-I64-NEXT: vmov s16, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d24[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; BE-I64-NEXT: vldr d23, [sp, #56] @ 8-byte Reload +; BE-I64-NEXT: vldr d20, [sp, #8] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d23[1], r0 +; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-I64-NEXT: vldr d22, [sp, #80] @ 8-byte Reload +; BE-I64-NEXT: vldr d26, [sp, #16] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d21, d20 +; BE-I64-NEXT: vmov.32 d22[1], r0 +; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-I64-NEXT: vldr d30, [sp] @ 8-byte Reload +; BE-I64-NEXT: vldr d25, [sp, #96] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d20, d26 +; BE-I64-NEXT: vldr d26, [sp, #32] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d10[1], r5 +; BE-I64-NEXT: vmov.32 d12[1], r9 +; BE-I64-NEXT: vldr d28, [sp, #40] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d27, d26 +; BE-I64-NEXT: vmov.32 d25[1], r0 +; BE-I64-NEXT: add r0, r11, #64 +; BE-I64-NEXT: vmov.32 d30[1], r8 +; BE-I64-NEXT: vmov.32 d9[1], r6 +; BE-I64-NEXT: vrev64.32 d26, d28 +; BE-I64-NEXT: vrev64.32 d29, d10 +; BE-I64-NEXT: vmov.32 d24[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d12 +; BE-I64-NEXT: vrev64.32 d28, d23 +; BE-I64-NEXT: vrev64.32 d23, d22 +; BE-I64-NEXT: vrev64.32 d22, d30 +; BE-I64-NEXT: vrev64.32 d31, d25 +; BE-I64-NEXT: vrev64.32 d0, d9 +; BE-I64-NEXT: vrev64.32 d30, d24 +; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-I64-NEXT: vrev64.32 d19, d13 +; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]! +; BE-I64-NEXT: vrev64.32 d18, d14 +; BE-I64-NEXT: vst1.64 {d22, d23}, [r11:128]! +; BE-I64-NEXT: vrev64.32 d17, d15 +; BE-I64-NEXT: vrev64.32 d16, d11 +; BE-I64-NEXT: vst1.64 {d18, d19}, [r11:128]! +; BE-I64-NEXT: vst1.64 {d16, d17}, [r11:128] +; BE-I64-NEXT: add sp, sp, #112 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) + ret <16 x iXLen> %a +} define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; LE-I32-LABEL: lrint_v1f32: @@ -76,7 +1330,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) ret <1 x iXLen> %a } -declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { ; LE-I32-LABEL: lrint_v2f32: @@ -160,7 +1413,6 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) ret <2 x iXLen> %a } -declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { ; LE-I32-LABEL: lrint_v4f32: @@ -274,7 +1526,6 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) ret <4 x iXLen> %a } -declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { ; LE-I32-LABEL: lrint_v8f32: @@ -488,7 +1739,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) ret <8 x iXLen> %a } -declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; LE-I32-LABEL: lrint_v16f32: @@ -1005,7 +2255,6 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) ret <16 x iXLen> %a } -declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { ; LE-I32-LABEL: lrint_v1f64: @@ -1043,7 +2292,6 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) ret <1 x iXLen> %a } -declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { ; LE-I32-LABEL: lrint_v2f64: @@ -1120,7 +2368,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x) ret <2 x iXLen> %a } -declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { ; LE-I32-LABEL: lrint_v4f64: @@ -1237,7 +2484,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x) ret <4 x iXLen> %a } -declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; LE-I32-LABEL: lrint_v8f64: @@ -1467,7 +2713,6 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) ret <8 x iXLen> %a } -declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { ; LE-I32-LABEL: lrint_v16f64: @@ -2053,7 +3298,6 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x) ret <16 x iXLen> %a } -declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; LE-I32-LABEL: lrint_v1fp128: @@ -2091,7 +3335,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x) ret <1 x iXLen> %a } -declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>) define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; LE-I32-LABEL: lrint_v2fp128: @@ -2194,7 +3437,6 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x) ret <2 x iXLen> %a } -declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>) define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; LE-I32-LABEL: lrint_v4fp128: @@ -2347,7 +3589,6 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x) ret <4 x iXLen> %a } -declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; LE-I32-LABEL: lrint_v8fp128: @@ -2664,7 +3905,6 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) ret <8 x iXLen> %a } -declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { ; LE-I32-LABEL: lrint_v16fp128: @@ -3262,4 +4502,3 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x) ret <16 x iXLen> %a } -declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) diff --git a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir index 8fa9337eae6c..03cb8e37844c 100644 --- a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir +++ b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir @@ -60,9 +60,9 @@ body: | $sp = t2STMDB_UPD $sp, 14, $noreg, $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11 $r4 = t2BICri $r4, 1, 14, $noreg, $noreg $sp = tSUBspi $sp, 34, 14, $noreg - VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15 + VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $fpscr_rm, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15 tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7 - VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15 + VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15 $sp = tADDspi $sp, 34, 14, $noreg $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11 $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc |
