summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir16
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir7
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir2
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir388
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir2
-rw-r--r--llvm/test/CodeGen/ARM/and-mask-variable.ll90
-rw-r--r--llvm/test/CodeGen/ARM/bf16_fast_math.ll18
-rw-r--r--llvm/test/CodeGen/ARM/build-attributes.ll319
-rw-r--r--llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll39
-rw-r--r--llvm/test/CodeGen/ARM/call-graph-section-assembly.ll63
-rw-r--r--llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll34
-rw-r--r--llvm/test/CodeGen/ARM/call-graph-section.ll37
-rw-r--r--llvm/test/CodeGen/ARM/carry.ll87
-rw-r--r--llvm/test/CodeGen/ARM/combine-movc-sub.ll12
-rw-r--r--llvm/test/CodeGen/ARM/cortex-m7-wideops.mir17
-rw-r--r--llvm/test/CodeGen/ARM/extract-bits.ll4591
-rw-r--r--llvm/test/CodeGen/ARM/extract-lowbits.ll2752
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-promote.ll50
-rw-r--r--llvm/test/CodeGen/ARM/fp16_fast_math.ll86
-rw-r--r--llvm/test/CodeGen/ARM/fpclamptosat.ll48
-rw-r--r--llvm/test/CodeGen/ARM/fpclamptosat_vec.ll107
-rw-r--r--llvm/test/CodeGen/ARM/inline-asm-clobber.ll7
-rw-r--r--llvm/test/CodeGen/ARM/ipra-reg-usage.ll2
-rw-r--r--llvm/test/CodeGen/ARM/issue159343.ll55
-rw-r--r--llvm/test/CodeGen/ARM/llrint-conv.ll78
-rw-r--r--llvm/test/CodeGen/ARM/llround-conv.ll74
-rw-r--r--llvm/test/CodeGen/ARM/llvm.exp10.ll16
-rw-r--r--llvm/test/CodeGen/ARM/llvm.frexp.ll36
-rw-r--r--llvm/test/CodeGen/ARM/lrint-conv.ll75
-rw-r--r--llvm/test/CodeGen/ARM/lround-conv.ll46
-rw-r--r--llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir4
-rw-r--r--llvm/test/CodeGen/ARM/nnan-fsub.ll20
-rw-r--r--llvm/test/CodeGen/ARM/pr159343.mir31
-rw-r--r--llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll4
-rw-r--r--llvm/test/CodeGen/ARM/sincos.ll27
-rw-r--r--llvm/test/CodeGen/ARM/vector-lrint.ll1321
-rw-r--r--llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir4
41 files changed, 9715 insertions, 858 deletions
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
index 77eeb34ef18c..4dd8af01f873 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
@@ -447,7 +447,7 @@ body: |
; CHECK-LABEL: name: test_vnmuls
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
- ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VNMULS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -477,7 +477,7 @@ body: |
; CHECK-LABEL: name: test_vnmuls_reassociate
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
- ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VNMULS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -507,7 +507,7 @@ body: |
; CHECK-LABEL: name: test_vnmuld
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
- ; CHECK: [[VNMULD:%[0-9]+]]:dpr = VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULD:%[0-9]+]]:dpr = nofpexcept VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VNMULD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -539,7 +539,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFNMAS:%[0-9]+]]:spr = VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMAS:%[0-9]+]]:spr = nofpexcept VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFNMAS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -573,7 +573,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = nofpexcept VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFNMAD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -607,7 +607,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFMSS:%[0-9]+]]:spr = VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFMSS:%[0-9]+]]:spr = nofpexcept VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFMSS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -640,7 +640,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFMSD:%[0-9]+]]:dpr = nofpexcept VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFMSD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -673,7 +673,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFNMSS:%[0-9]+]]:spr = VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMSS:%[0-9]+]]:spr = nofpexcept VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFNMSS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir
index c8fee5d33442..7cbe5de22deb 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir
@@ -119,9 +119,10 @@ body: |
; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SUB [[COUNT]], [[BITDIFF]]
%2(s16) = G_CTLZ %1
- ; CHECK: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[R32]], [[BITDIFF]]
- ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]]
- ; CHECK: $r0 = COPY [[R]]
+ ; LIBCALLS: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[R32]], [[BITDIFF]]
+ ; LIBCALLS: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]]
+ ; LIBCALLS: $r0 = COPY [[R]]
+ ; CLZ: $r0 = COPY [[R32]]
%3(s32) = G_SEXT %2(s16)
$r0 = COPY %3(s32)
BX_RET 14, $noreg, implicit $r0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
index 45a846b5b877..4cded131190f 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
@@ -19,7 +19,7 @@ body: |
bb.1:
; CHECK-LABEL: name: test_fptosi
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
- ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]]
; CHECK: $r0 = COPY [[COPY1]]
; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
index ec834f1233ac..4517fe6dd4f1 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -O0 -mtriple arm-- -mattr=+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -O0 -mtriple thumb-- -mattr=+v6t2,+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
@@ -76,11 +77,9 @@ body: |
...
---
name: test_fadd_s32
-# CHECK-LABEL: name: test_fadd_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -89,28 +88,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fadd_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VADDS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FADD %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VADDS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fadd_s64
-# CHECK-LABEL: name: test_fadd_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -119,28 +119,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fadd_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VADDD:%[0-9]+]]:dpr = nofpexcept VADDD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VADDD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FADD %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VADDD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fsub_s32
-# CHECK-LABEL: name: test_fsub_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -149,28 +150,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fsub_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VSUBS:%[0-9]+]]:spr = nofpexcept VSUBS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VSUBS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FSUB %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VSUBS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fsub_s64
-# CHECK-LABEL: name: test_fsub_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -179,28 +181,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fsub_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VSUBD:%[0-9]+]]:dpr = nofpexcept VSUBD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VSUBD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FSUB %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VSUBD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fmul_s32
-# CHECK-LABEL: name: test_fmul_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -209,28 +212,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fmul_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nofpexcept VMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VMULS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FMUL %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VMULS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fmul_s64
-# CHECK-LABEL: name: test_fmul_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -239,28 +243,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fmul_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VMULD:%[0-9]+]]:dpr = nofpexcept VMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VMULD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FMUL %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VMULD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fdiv_s32
-# CHECK-LABEL: name: test_fdiv_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -269,28 +274,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fdiv_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VDIVS:%[0-9]+]]:spr = nofpexcept VDIVS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VDIVS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FDIV %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VDIVS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fdiv_s64
-# CHECK-LABEL: name: test_fdiv_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -299,28 +305,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fdiv_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VDIVD:%[0-9]+]]:dpr = nofpexcept VDIVD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VDIVD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FDIV %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VDIVD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fneg_s32
-# CHECK-LABEL: name: test_fneg_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -328,25 +335,26 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fneg_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VNEGS:%[0-9]+]]:spr = VNEGS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VNEGS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FNEG %0
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VNEGS [[VREGX]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fneg_s64
-# CHECK-LABEL: name: test_fneg_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -355,25 +363,26 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fneg_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VNEGD:%[0-9]+]]:dpr = VNEGD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VNEGD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = G_FNEG %0
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VNEGD [[VREGX]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fma_s32
-# CHECK-LABEL: name: test_fma_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -383,31 +392,32 @@ body: |
bb.0:
liveins: $s0, $s1, $s2
+ ; CHECK-LABEL: name: test_fma_s32
+ ; CHECK: liveins: $s0, $s1, $s2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:spr = COPY $s2
+ ; CHECK-NEXT: [[VFMAS:%[0-9]+]]:spr = nofpexcept VFMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VFMAS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = COPY $s2
- ; CHECK: [[VREGZ:%[0-9]+]]:spr = COPY $s2
%3(s32) = G_FMA %0, %1, %2
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VFMAS [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %3(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fma_s64
-# CHECK-LABEL: name: test_fma_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -417,31 +427,32 @@ body: |
bb.0:
liveins: $d0, $d1, $d2
+ ; CHECK-LABEL: name: test_fma_s64
+ ; CHECK: liveins: $d0, $d1, $d2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2
+ ; CHECK-NEXT: [[VFMAD:%[0-9]+]]:dpr = nofpexcept VFMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VFMAD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = COPY $d2
- ; CHECK: [[VREGZ:%[0-9]+]]:dpr = COPY $d2
%3(s64) = G_FMA %0, %1, %2
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VFMAD [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %3(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fpext_s32_to_s64
-# CHECK-LABEL: name: test_fpext_s32_to_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -449,25 +460,26 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fpext_s32_to_s64
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VCVTDS:%[0-9]+]]:dpr = nofpexcept VCVTDS [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VCVTDS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s64) = G_FPEXT %0(s32)
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VCVTDS [[VREGX]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fptrunc_s64_to_s32
-# CHECK-LABEL: name: test_fptrunc_s64_to_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -475,25 +487,26 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptrunc_s64_to_s32
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VCVTSD:%[0-9]+]]:spr = nofpexcept VCVTSD [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VCVTSD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTRUNC %0(s64)
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VCVTSD [[VREGX]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fptosi_s32
-# CHECK-LABEL: name: test_fptosi_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -501,26 +514,27 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fptosi_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FPTOSI %0(s32)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZS [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptosi_s64
-# CHECK-LABEL: name: test_fptosi_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -528,26 +542,27 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptosi_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VTOSIZD:%[0-9]+]]:spr = nofpexcept VTOSIZD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZD]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTOSI %0(s64)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZD [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptoui_s32
-# CHECK-LABEL: name: test_fptoui_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -555,26 +570,27 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fptoui_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VTOUIZS:%[0-9]+]]:spr = nofpexcept VTOUIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZS]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FPTOUI %0(s32)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZS [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptoui_s64
-# CHECK-LABEL: name: test_fptoui_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -582,26 +598,27 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptoui_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VTOUIZD:%[0-9]+]]:spr = nofpexcept VTOUIZD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZD]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTOUI %0(s64)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZD [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_sitofp_s32
-# CHECK-LABEL: name: test_sitofp_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -609,26 +626,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_sitofp_s32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VSITOS:%[0-9]+]]:spr = nofpexcept VSITOS [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VSITOS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s32) = G_SITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VSITOS [[VREGF]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_sitofp_s64
-# CHECK-LABEL: name: test_sitofp_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -636,26 +654,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_sitofp_s64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VSITOD:%[0-9]+]]:dpr = nofpexcept VSITOD [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VSITOD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s64) = G_SITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VSITOD [[VREGF]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_uitofp_s32
-# CHECK-LABEL: name: test_uitofp_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -663,26 +682,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_uitofp_s32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VUITOS:%[0-9]+]]:spr = nofpexcept VUITOS [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VUITOS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s32) = G_UITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VUITOS [[VREGF]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_uitofp_s64
-# CHECK-LABEL: name: test_uitofp_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -690,26 +710,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_uitofp_s64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VUITOD:%[0-9]+]]:dpr = nofpexcept VUITOD [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VUITOD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s64) = G_UITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VUITOD [[VREGF]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_load_f32
-# CHECK-LABEL: name: test_load_f32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -717,25 +738,26 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_load_f32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32))
+ ; CHECK-NEXT: $s0 = COPY [[VLDRS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(p0) = COPY $r0
- ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0
%1(s32) = G_LOAD %0(p0) :: (load (s32))
- ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14 /* CC::al */, $noreg
$s0 = COPY %1
- ; CHECK: $s0 = COPY %[[V]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_load_f64
-# CHECK-LABEL: name: test_load_f64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -743,45 +765,50 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_load_f64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[VLDRD:%[0-9]+]]:dpr = VLDRD [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s64))
+ ; CHECK-NEXT: $d0 = COPY [[VLDRD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(p0) = COPY $r0
- ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0
%1(s64) = G_LOAD %0(p0) :: (load (s64))
- ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14 /* CC::al */, $noreg
$d0 = COPY %1
- ; CHECK: $d0 = COPY %[[V]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_stores
-# CHECK-LABEL: name: test_stores
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
- { id: 2, class: fprb }
-# CHECK: id: [[P:[0-9]+]], class: gpr
-# CHECK: id: [[F32:[0-9]+]], class: spr
-# CHECK: id: [[F64:[0-9]+]], class: dpr
body: |
bb.0:
liveins: $r0, $s0, $d0
+ ; CHECK-LABEL: name: test_stores
+ ; CHECK: liveins: $r0, $s0, $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2
+ ; CHECK-NEXT: VSTRS [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32))
+ ; CHECK-NEXT: VSTRD [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s64))
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg
%0(p0) = COPY $r0
%1(s32) = COPY $s0
%2(s64) = COPY $d2
G_STORE %1(s32), %0(p0) :: (store (s32))
- ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14 /* CC::al */, $noreg
G_STORE %2(s64), %0(p0) :: (store (s64))
- ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14 /* CC::al */, $noreg
BX_RET 14, $noreg
...
@@ -833,11 +860,9 @@ body: |
...
---
name: test_soft_fp_double
-# CHECK-LABEL: name: test_soft_fp_double
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: gprb }
@@ -848,24 +873,27 @@ body: |
bb.0:
liveins: $r0, $r1, $r2, $r3
+ ; CHECK-LABEL: name: test_soft_fp_double
+ ; CHECK: liveins: $r0, $r1, $r2, $r3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r3
+ ; CHECK-NEXT: [[VMOVDRR:%[0-9]+]]:dpr = VMOVDRR [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[VMOVDRR]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $r0 = COPY [[VMOVRRD]]
+ ; CHECK-NEXT: $r1 = COPY [[VMOVRRD1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1
%0(s32) = COPY $r2
- ; CHECK: [[IN1:%[0-9]+]]:gpr = COPY $r2
%1(s32) = COPY $r3
- ; CHECK: [[IN2:%[0-9]+]]:gpr = COPY $r3
%2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
- ; CHECK: %[[DREG:[0-9]+]]:dpr = VMOVDRR [[IN1]], [[IN2]]
%3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64)
- ; CHECK: [[OUT1:%[0-9]+]]:gpr, [[OUT2:%[0-9]+]]:gpr = VMOVRRD %[[DREG]]
$r0 = COPY %3
- ; CHECK: $r0 = COPY [[OUT1]]
$r1 = COPY %4
- ; CHECK: $r1 = COPY [[OUT2]]
BX_RET 14, $noreg, implicit $r0, implicit $r1
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1
...
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
index a6fc4dad49fd..fa982d8a60d7 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
@@ -31,7 +31,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = nofpexcept VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFNMSD]]
; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $d0
%0:fprb(s64) = COPY $d0
diff --git a/llvm/test/CodeGen/ARM/and-mask-variable.ll b/llvm/test/CodeGen/ARM/and-mask-variable.ll
new file mode 100644
index 000000000000..0f84b76f97a6
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/and-mask-variable.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
+; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
+; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
+; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; V7M-LABEL: mask_pair:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: mask_pair:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: mask_pair:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: mask_pair:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: bx lr
+ %shl = shl nsw i32 -1, %y
+ %and = and i32 %shl, %x
+ ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; V7M-LABEL: mask_pair_64:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsl.w r12, r3, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl.w r12, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl r3, r2
+; V7M-NEXT: and.w r0, r0, r12
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: mask_pair_64:
+; V7A: @ %bb.0:
+; V7A-NEXT: subs r12, r2, #32
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsl r2, r3, r2
+; V7A-NEXT: lslpl r3, r3, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: and r1, r3, r1
+; V7A-NEXT: and r0, r2, r0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: mask_pair_64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsl.w r12, r3, r2
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl.w r12, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r3, r2
+; V7A-T-NEXT: and.w r0, r0, r12
+; V7A-T-NEXT: ands r1, r3
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: mask_pair_64:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %shl = shl nsw i64 -1, %y
+ %and = and i64 %shl, %x
+ ret i64 %and
+}
diff --git a/llvm/test/CodeGen/ARM/bf16_fast_math.ll b/llvm/test/CodeGen/ARM/bf16_fast_math.ll
index 1b18ea6feb2e..5f7e1e69d99d 100644
--- a/llvm/test/CodeGen/ARM/bf16_fast_math.ll
+++ b/llvm/test/CodeGen/ARM/bf16_fast_math.ll
@@ -17,7 +17,7 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -44,7 +44,7 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -71,7 +71,7 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -102,7 +102,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -113,7 +113,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS1]]
@@ -142,10 +142,10 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z)
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[MOVsi2:%[0-9]+]]:gpr = MOVsi [[COPY]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -174,7 +174,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -185,7 +185,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS1]]
diff --git a/llvm/test/CodeGen/ARM/build-attributes.ll b/llvm/test/CodeGen/ARM/build-attributes.ll
index 68844aed0363..306a4a31b79f 100644
--- a/llvm/test/CodeGen/ARM/build-attributes.ll
+++ b/llvm/test/CodeGen/ARM/build-attributes.ll
@@ -3,23 +3,16 @@
; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale -mattr=+strict-align | FileCheck %s --check-prefix=XSCALE
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6-FAST
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M
-; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align | FileCheck %s --check-prefix=ARM1156T2F-S
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=ARM1156T2F-S-FAST
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M
-; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7M-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V8-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8
; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -31,35 +24,24 @@
; RUN: llc < %s -mtriple=thumbv8m.main-linux-gnueabi | FileCheck %s --check-prefix=V8MMAINLINE
; RUN: llc < %s -mtriple=thumbv8m.main-linux-gnueabi -mattr=+dsp | FileCheck %s --check-prefix=V8MMAINLINE_DSP
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-neon,-d32 | FileCheck %s --check-prefix=CORTEX-A5-NONEON
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2sp | FileCheck %s --check-prefix=CORTEX-A5-NOFPU
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2sp -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-NOFPU-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A8-SOFT
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=soft -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A8-SOFT-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A8-HARD
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=hard -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A8-HARD-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A8-SOFT
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A9-SOFT-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A9-HARD-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2sp | FileCheck %s --check-prefix=CORTEX-A12-NOFPU
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2sp -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-NOFPU-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A15-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 | FileCheck %s --check-prefix=CORTEX-A17-DEFAULT
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2sp | FileCheck %s --check-prefix=CORTEX-A17-NOFPU
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2sp -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-no-trapping-fp-math | FileCheck %s --check-prefix=NO-TRAPPING-MATH
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -denormal-fp-math=ieee | FileCheck %s --check-prefix=DENORMAL-IEEE
@@ -74,37 +56,26 @@
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus | FileCheck %s --check-prefix=CORTEX-M0PLUS
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0PLUS-FAST
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 | FileCheck %s --check-prefix=CORTEX-M1
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M1-FAST
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align | FileCheck %s --check-prefix=SC000
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC000-FAST
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3
-; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M3-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 | FileCheck %s --check-prefix=SC300
-; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC300-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT
-; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M4-SOFT-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD
-; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M4-HARD-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2sp | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SOFT
-; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2sp -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-NOFPU-FAST
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-fp64 | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SINGLE
-; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-fp64 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7-DOUBLE
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m23 | FileCheck %s --check-prefix=CORTEX-M23
; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=CORTEX-M33
-; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M33-FAST
; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m35p | FileCheck %s --check-prefix=CORTEX-M35P
@@ -113,49 +84,34 @@
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4 | FileCheck %s --check-prefix=CORTEX-R4
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4f | FileCheck %s --check-prefix=CORTEX-R4F
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5
-; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R5-FAST
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 | FileCheck %s --check-prefix=CORTEX-R7
-; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R7-FAST
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r8 | FileCheck %s --check-prefix=CORTEX-R8
-; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r8 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R8-FAST
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r8 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a32 | FileCheck %s --check-prefix=CORTEX-A32
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a32 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A32-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a32 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 | FileCheck %s --check-prefix=CORTEX-A35
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A35-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A53-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A57-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=CORTEX-A72
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A72-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a73 | FileCheck %s --check-prefix=CORTEX-A73
; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m3 | FileCheck %s --check-prefix=EXYNOS-M3
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m3 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=EXYNOS-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m3 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m4 | FileCheck %s --check-prefix=EXYNOS-M4
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m4 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=EXYNOS-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m4 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m5 | FileCheck %s --check-prefix=EXYNOS-M5
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m5 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=EXYNOS-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=exynos-m5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A-FAST
; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=CORTEX-A7-CHECK
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-CHECK-FAST
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2sp,-vfp3,-vfp4,-neon,-fp16 | FileCheck %s --check-prefix=CORTEX-A7-NOFPU
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2sp,-vfp3,-vfp4,-neon,-fp16 -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-NOFPU-FAST
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-FPUV4-FAST
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,-d32,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC
; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER
@@ -278,15 +234,6 @@
; V6-NOT: .eabi_attribute 28
; V6: .eabi_attribute 38, 1
-; V6-FAST-NOT: .eabi_attribute 19
-;; Despite the V6 CPU having no FPU by default, we chose to flush to
-;; positive zero here. There's no hardware support doing this, but the
-;; fast maths software library might.
-; V6-FAST-NOT: .eabi_attribute 20
-; V6-FAST-NOT: .eabi_attribute 21
-; V6-FAST-NOT: .eabi_attribute 22
-; V6-FAST: .eabi_attribute 23, 1
-
;; We emit 6, 12 for both v6-M and v6S-M, technically this is incorrect for
;; V6-M, however we don't model the OS extension so this is fine.
; V6M: .eabi_attribute 6, 12
@@ -312,14 +259,6 @@
; V6M-NOT: .eabi_attribute 28
; V6M: .eabi_attribute 38, 1
-; V6M-FAST-NOT: .eabi_attribute 19
-;; Despite the V6M CPU having no FPU by default, we chose to flush to
-;; positive zero here. There's no hardware support doing this, but the
-;; fast maths software library might.
-; V6M-FAST-NOT: .eabi_attribute 20
-; V6M-FAST-NOT: .eabi_attribute 21
-; V6M-FAST-NOT: .eabi_attribute 22
-; V6M-FAST: .eabi_attribute 23, 1
; ARM1156T2F-S: .cpu arm1156t2f-s
; ARM1156T2F-S: .eabi_attribute 6, 8
@@ -342,14 +281,6 @@
; ARM1156T2F-S-NOT: .eabi_attribute 28
; ARM1156T2F-S: .eabi_attribute 38, 1
-; ARM1156T2F-S-FAST-NOT: .eabi_attribute 19
-;; V6 cores default to flush to positive zero (value 0). Note that value 2 is also equally
-;; valid for this core, it's an implementation defined question as to which of 0 and 2 you
-;; select. LLVM historically picks 0.
-; ARM1156T2F-S-FAST-NOT: .eabi_attribute 20
-; ARM1156T2F-S-FAST-NOT: .eabi_attribute 21
-; ARM1156T2F-S-FAST-NOT: .eabi_attribute 22
-; ARM1156T2F-S-FAST: .eabi_attribute 23, 1
; V7M: .eabi_attribute 6, 10
; V7M: .eabi_attribute 7, 77
@@ -374,15 +305,6 @@
; V7M-NOT: .eabi_attribute 28
; V7M: .eabi_attribute 38, 1
-; V7M-FAST-NOT: .eabi_attribute 19
-;; Despite the V7M CPU having no FPU by default, we chose to flush
-;; preserving sign. This matches what the hardware would do in the
-;; architecture revision were to exist on the current target.
-; V7M-FAST: .eabi_attribute 20, 2
-; V7M-FAST-NOT: .eabi_attribute 21
-; V7M-FAST-NOT: .eabi_attribute 22
-; V7M-FAST: .eabi_attribute 23, 1
-
; V7: .syntax unified
; V7: .eabi_attribute 6, 10
; V7-NOT: .eabi_attribute 27
@@ -401,13 +323,6 @@
; V7-NOT: .eabi_attribute 28
; V7: .eabi_attribute 38, 1
-; V7-FAST-NOT: .eabi_attribute 19
-;; The default CPU does have an FPU and it must be VFPv3 or better, so it flushes
-;; denormals to zero preserving the sign.
-; V7-FAST: .eabi_attribute 20, 2
-; V7-FAST-NOT: .eabi_attribute 21
-; V7-FAST-NOT: .eabi_attribute 22
-; V7-FAST: .eabi_attribute 23, 1
; V7VE: .syntax unified
; V7VE: .eabi_attribute 6, 10 @ Tag_CPU_arch
@@ -435,12 +350,6 @@
; V8-NOT: .eabi_attribute 22
; V8: .eabi_attribute 23, 3
-; V8-FAST-NOT: .eabi_attribute 19
-;; The default does have an FPU, and for V8-A, it flushes preserving sign.
-; V8-FAST: .eabi_attribute 20, 2
-; V8-FAST-NOT: .eabi_attribute 21
-; V8-FAST-NOT: .eabi_attribute 22
-; V8-FAST: .eabi_attribute 23, 1
; Vt8: .syntax unified
; Vt8: .eabi_attribute 6, 14
@@ -552,15 +461,11 @@
;; We default to IEEE 754 compliance
; CORTEX-A7-CHECK: .eabi_attribute 20, 1
;; The A7 has VFPv3 support by default, so flush preserving sign.
-; CORTEX-A7-CHECK-FAST: .eabi_attribute 20, 2
; CORTEX-A7-NOFPU: .eabi_attribute 20, 1
;; Despite there being no FPU, we chose to flush to zero preserving
;; sign. This matches what the hardware would do for this architecture
;; revision.
-; CORTEX-A7-NOFPU-FAST: .eabi_attribute 20, 2
; CORTEX-A7-FPUV4: .eabi_attribute 20, 1
-;; The VFPv4 FPU flushes preserving sign.
-; CORTEX-A7-FPUV4-FAST: .eabi_attribute 20, 2
; Tag_ABI_FP_exceptions
; CORTEX-A7-CHECK: .eabi_attribute 21, 1
@@ -610,13 +515,6 @@
; CORTEX-A5-DEFAULT: .eabi_attribute 24, 1
; CORTEX-A5-DEFAULT: .eabi_attribute 25, 1
-; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 19
-;; The A5 defaults to a VFPv4 FPU, so it flushed preserving the sign when -ffast-math
-;; is given.
-; CORTEX-A5-DEFAULT-FAST: .eabi_attribute 20, 2
-; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 21
-; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 22
-; CORTEX-A5-DEFAULT-FAST: .eabi_attribute 23, 1
; CORTEX-A5-NONEON: .cpu cortex-a5
; CORTEX-A5-NONEON: .eabi_attribute 6, 10
@@ -634,13 +532,6 @@
; CORTEX-A5-NONEON: .eabi_attribute 24, 1
; CORTEX-A5-NONEON: .eabi_attribute 25, 1
-; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 19
-;; The A5 defaults to a VFPv4 FPU, so it flushed preserving sign when -ffast-math
-;; is given.
-; CORTEX-A5-NONEON-FAST: .eabi_attribute 20, 2
-; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 21
-; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 22
-; CORTEX-A5-NONEON-FAST: .eabi_attribute 23, 1
; CORTEX-A5-NOFPU: .cpu cortex-a5
; CORTEX-A5-NOFPU: .eabi_attribute 6, 10
@@ -659,14 +550,9 @@
; CORTEX-A5-NOFPU: .eabi_attribute 24, 1
; CORTEX-A5-NOFPU: .eabi_attribute 25, 1
-; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 19
;; Despite there being no FPU, we chose to flush to zero preserving
;; sign. This matches what the hardware would do for this architecture
;; revision.
-; CORTEX-A5-NOFPU-FAST: .eabi_attribute 20, 2
-; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 21
-; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 22
-; CORTEX-A5-NOFPU-FAST: .eabi_attribute 23, 1
; CORTEX-A8-SOFT: .cpu cortex-a8
; CORTEX-A8-SOFT: .eabi_attribute 6, 10
@@ -712,15 +598,6 @@
; CORTEX-A9-SOFT-NOT: .eabi_attribute 28
; CORTEX-A9-SOFT: .eabi_attribute 38, 1
-; CORTEX-A8-SOFT-FAST-NOT: .eabi_attribute 19
-; CORTEX-A9-SOFT-FAST-NOT: .eabi_attribute 19
-;; The A9 defaults to a VFPv3 FPU, so it flushes preserving the sign when
-;; -ffast-math is specified.
-; CORTEX-A8-SOFT-FAST: .eabi_attribute 20, 2
-; CORTEX-A9-SOFT-FAST: .eabi_attribute 20, 2
-; CORTEX-A5-SOFT-FAST-NOT: .eabi_attribute 21
-; CORTEX-A5-SOFT-FAST-NOT: .eabi_attribute 22
-; CORTEX-A5-SOFT-FAST: .eabi_attribute 23, 1
; CORTEX-A8-HARD: .cpu cortex-a8
; CORTEX-A8-HARD: .eabi_attribute 6, 10
@@ -766,21 +643,6 @@
; CORTEX-A9-HARD: .eabi_attribute 28, 1
; CORTEX-A9-HARD: .eabi_attribute 38, 1
-; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 19
-;; The A8 defaults to a VFPv3 FPU, so it flushes preserving the sign when
-;; -ffast-math is specified.
-; CORTEX-A8-HARD-FAST: .eabi_attribute 20, 2
-; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 21
-; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 22
-; CORTEX-A8-HARD-FAST: .eabi_attribute 23, 1
-
-; CORTEX-A9-HARD-FAST-NOT: .eabi_attribute 19
-;; The A9 defaults to a VFPv3 FPU, so it flushes preserving the sign when
-;; -ffast-math is specified.
-; CORTEX-A9-HARD-FAST: .eabi_attribute 20, 2
-; CORTEX-A9-HARD-FAST-NOT: .eabi_attribute 21
-; CORTEX-A9-HARD-FAST-NOT: .eabi_attribute 22
-; CORTEX-A9-HARD-FAST: .eabi_attribute 23, 1
; CORTEX-A12-DEFAULT: .cpu cortex-a12
; CORTEX-A12-DEFAULT: .eabi_attribute 6, 10
@@ -800,13 +662,6 @@
; CORTEX-A12-DEFAULT: .eabi_attribute 24, 1
; CORTEX-A12-DEFAULT: .eabi_attribute 25, 1
-; CORTEX-A12-DEFAULT-FAST-NOT: .eabi_attribute 19
-;; The A12 defaults to a VFPv3 FPU, so it flushes preserving the sign when
-;; -ffast-math is specified.
-; CORTEX-A12-DEFAULT-FAST: .eabi_attribute 20, 2
-; CORTEX-A12-HARD-FAST-NOT: .eabi_attribute 21
-; CORTEX-A12-HARD-FAST-NOT: .eabi_attribute 22
-; CORTEX-A12-HARD-FAST: .eabi_attribute 23, 1
; CORTEX-A12-NOFPU: .cpu cortex-a12
; CORTEX-A12-NOFPU: .eabi_attribute 6, 10
@@ -826,14 +681,6 @@
; CORTEX-A12-NOFPU: .eabi_attribute 24, 1
; CORTEX-A12-NOFPU: .eabi_attribute 25, 1
-; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 19
-;; Despite there being no FPU, we chose to flush to zero preserving
-;; sign. This matches what the hardware would do for this architecture
-;; revision.
-; CORTEX-A12-NOFPU-FAST: .eabi_attribute 20, 2
-; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 21
-; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 22
-; CORTEX-A12-NOFPU-FAST: .eabi_attribute 23, 1
; CORTEX-A15: .cpu cortex-a15
; CORTEX-A15: .eabi_attribute 6, 10
@@ -857,13 +704,6 @@
; CORTEX-A15-NOT: .eabi_attribute 28
; CORTEX-A15: .eabi_attribute 38, 1
-; CORTEX-A15-FAST-NOT: .eabi_attribute 19
-;; The A15 defaults to a VFPv3 FPU, so it flushes preserving the sign when
-;; -ffast-math is specified.
-; CORTEX-A15-FAST: .eabi_attribute 20, 2
-; CORTEX-A15-FAST-NOT: .eabi_attribute 21
-; CORTEX-A15-FAST-NOT: .eabi_attribute 22
-; CORTEX-A15-FAST: .eabi_attribute 23, 1
; CORTEX-A17-DEFAULT: .cpu cortex-a17
; CORTEX-A17-DEFAULT: .eabi_attribute 6, 10
@@ -883,13 +723,6 @@
; CORTEX-A17-DEFAULT: .eabi_attribute 24, 1
; CORTEX-A17-DEFAULT: .eabi_attribute 25, 1
-; CORTEX-A17-FAST-NOT: .eabi_attribute 19
-;; The A17 defaults to a VFPv3 FPU, so it flushes preserving the sign when
-;; -ffast-math is specified.
-; CORTEX-A17-FAST: .eabi_attribute 20, 2
-; CORTEX-A17-FAST-NOT: .eabi_attribute 21
-; CORTEX-A17-FAST-NOT: .eabi_attribute 22
-; CORTEX-A17-FAST: .eabi_attribute 23, 1
; CORTEX-A17-NOFPU: .cpu cortex-a17
; CORTEX-A17-NOFPU: .eabi_attribute 6, 10
@@ -910,13 +743,6 @@
; CORTEX-A17-NOFPU: .eabi_attribute 25, 1
; CORTEX-A17-NOFPU-NOT: .eabi_attribute 19
-;; Despite there being no FPU, we chose to flush to zero preserving
-;; sign. This matches what the hardware would do for this architecture
-;; revision.
-; CORTEX-A17-NOFPU-FAST: .eabi_attribute 20, 2
-; CORTEX-A17-NOFPU-FAST-NOT: .eabi_attribute 21
-; CORTEX-A17-NOFPU-FAST-NOT: .eabi_attribute 22
-; CORTEX-A17-NOFPU-FAST: .eabi_attribute 23, 1
; Test flags -enable-no-trapping-fp-math and -denormal-fp-math:
; NO-TRAPPING-MATH: .eabi_attribute 21, 0
@@ -946,16 +772,6 @@
; CORTEX-M0-NOT: .eabi_attribute 28
; CORTEX-M0: .eabi_attribute 38, 1
-; CORTEX-M0-FAST-NOT: .eabi_attribute 19
-;; Despite the M0 CPU having no FPU in this scenario, we chose to
-;; flush to positive zero here. There's no hardware support doing
-;; this, but the fast maths software library might and such behaviour
-;; would match hardware support on this architecture revision if it
-;; existed.
-; CORTEX-M0-FAST-NOT: .eabi_attribute 20
-; CORTEX-M0-FAST-NOT: .eabi_attribute 21
-; CORTEX-M0-FAST-NOT: .eabi_attribute 22
-; CORTEX-M0-FAST: .eabi_attribute 23, 1
; CORTEX-M0PLUS: .cpu cortex-m0plus
; CORTEX-M0PLUS: .eabi_attribute 6, 12
@@ -978,16 +794,6 @@
; CORTEX-M0PLUS-NOT: .eabi_attribute 28
; CORTEX-M0PLUS: .eabi_attribute 38, 1
-; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 19
-;; Despite the M0+ CPU having no FPU in this scenario, we chose to
-;; flush to positive zero here. There's no hardware support doing
-;; this, but the fast maths software library might and such behaviour
-;; would match hardware support on this architecture revision if it
-;; existed.
-; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 20
-; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 21
-; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 22
-; CORTEX-M0PLUS-FAST: .eabi_attribute 23, 1
; CORTEX-M1: .cpu cortex-m1
; CORTEX-M1: .eabi_attribute 6, 12
@@ -1010,16 +816,6 @@
; CORTEX-M1-NOT: .eabi_attribute 28
; CORTEX-M1: .eabi_attribute 38, 1
-; CORTEX-M1-FAST-NOT: .eabi_attribute 19
-;; Despite the M1 CPU having no FPU in this scenario, we chose to
-;; flush to positive zero here. There's no hardware support doing
-;; this, but the fast maths software library might and such behaviour
-;; would match hardware support on this architecture revision if it
-;; existed.
-; CORTEX-M1-FAST-NOT: .eabi_attribute 20
-; CORTEX-M1-FAST-NOT: .eabi_attribute 21
-; CORTEX-M1-FAST-NOT: .eabi_attribute 22
-; CORTEX-M1-FAST: .eabi_attribute 23, 1
; SC000: .cpu sc000
; SC000: .eabi_attribute 6, 12
@@ -1041,16 +837,6 @@
; SC000-NOT: .eabi_attribute 28
; SC000: .eabi_attribute 38, 1
-; SC000-FAST-NOT: .eabi_attribute 19
-;; Despite the SC000 CPU having no FPU in this scenario, we chose to
-;; flush to positive zero here. There's no hardware support doing
-;; this, but the fast maths software library might and such behaviour
-;; would match hardware support on this architecture revision if it
-;; existed.
-; SC000-FAST-NOT: .eabi_attribute 20
-; SC000-FAST-NOT: .eabi_attribute 21
-; SC000-FAST-NOT: .eabi_attribute 22
-; SC000-FAST: .eabi_attribute 23, 1
; CORTEX-M3: .cpu cortex-m3
; CORTEX-M3: .eabi_attribute 6, 10
@@ -1073,14 +859,6 @@
; CORTEX-M3-NOT: .eabi_attribute 28
; CORTEX-M3: .eabi_attribute 38, 1
-; CORTEX-M3-FAST-NOT: .eabi_attribute 19
-;; Despite there being no FPU, we chose to flush to zero preserving
-;; sign. This matches what the hardware would do for this architecture
-;; revision.
-; CORTEX-M3-FAST: .eabi_attribute 20, 2
-; CORTEX-M3-FAST-NOT: .eabi_attribute 21
-; CORTEX-M3-FAST-NOT: .eabi_attribute 22
-; CORTEX-M3-FAST: .eabi_attribute 23, 1
; SC300: .cpu sc300
; SC300: .eabi_attribute 6, 10
@@ -1103,14 +881,6 @@
; SC300-NOT: .eabi_attribute 28
; SC300: .eabi_attribute 38, 1
-; SC300-FAST-NOT: .eabi_attribute 19
-;; Despite there being no FPU, we chose to flush to zero preserving
-;; sign. This matches what the hardware would do for this architecture
-;; revision.
-; SC300-FAST: .eabi_attribute 20, 2
-; SC300-FAST-NOT: .eabi_attribute 21
-; SC300-FAST-NOT: .eabi_attribute 22
-; SC300-FAST: .eabi_attribute 23, 1
; CORTEX-M4-SOFT: .cpu cortex-m4
; CORTEX-M4-SOFT: .eabi_attribute 6, 13
@@ -1134,13 +904,6 @@
; CORTEX-M4-SOFT-NOT: .eabi_attribute 28
; CORTEX-M4-SOFT: .eabi_attribute 38, 1
-; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 19
-;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when
-;; -ffast-math is specified.
-; CORTEX-M4-SOFT-FAST: .eabi_attribute 20, 2
-; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 21
-; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 22
-; CORTEX-M4-SOFT-FAST: .eabi_attribute 23, 1
; CORTEX-M4-HARD: .cpu cortex-m4
; CORTEX-M4-HARD: .eabi_attribute 6, 13
@@ -1164,13 +927,6 @@
; CORTEX-M4-HARD: .eabi_attribute 28, 1
; CORTEX-M4-HARD: .eabi_attribute 38, 1
-; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 19
-;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when
-;; -ffast-math is specified.
-; CORTEX-M4-HARD-FAST: .eabi_attribute 20, 2
-; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 21
-; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 22
-; CORTEX-M4-HARD-FAST: .eabi_attribute 23, 1
; CORTEX-M7: .cpu cortex-m7
; CORTEX-M7: .eabi_attribute 6, 13
@@ -1197,16 +953,6 @@
; CORTEX-M7: .eabi_attribute 38, 1
; CORTEX-M7: .eabi_attribute 14, 0
-; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 19
-;; The M7 has the ARMv8 FP unit, which always flushes preserving sign.
-; CORTEX-M7-FAST: .eabi_attribute 20, 2
-;; Despite there being no FPU, we chose to flush to zero preserving
-;; sign. This matches what the hardware would do for this architecture
-;; revision.
-; CORTEX-M7-NOFPU-FAST: .eabi_attribute 20, 2
-; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 21
-; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 22
-; CORTEX-M7-NOFPU-FAST: .eabi_attribute 23, 1
; CORTEX-R4: .cpu cortex-r4
; CORTEX-R4: .eabi_attribute 6, 10
@@ -1273,12 +1019,6 @@
; CORTEX-R5-NOT: .eabi_attribute 28
; CORTEX-R5: .eabi_attribute 38, 1
-; CORTEX-R5-FAST-NOT: .eabi_attribute 19
-;; The R5 has the VFPv3 FP unit, which always flushes preserving sign.
-; CORTEX-R5-FAST: .eabi_attribute 20, 2
-; CORTEX-R5-FAST-NOT: .eabi_attribute 21
-; CORTEX-R5-FAST-NOT: .eabi_attribute 22
-; CORTEX-R5-FAST: .eabi_attribute 23, 1
; CORTEX-R7: .cpu cortex-r7
; CORTEX-R7: .eabi_attribute 6, 10
@@ -1301,12 +1041,6 @@
; CORTEX-R7-NOT: .eabi_attribute 28
; CORTEX-R7: .eabi_attribute 38, 1
-; CORTEX-R7-FAST-NOT: .eabi_attribute 19
-;; The R7 has the VFPv3 FP unit, which always flushes preserving sign.
-; CORTEX-R7-FAST: .eabi_attribute 20, 2
-; CORTEX-R7-FAST-NOT: .eabi_attribute 21
-; CORTEX-R7-FAST-NOT: .eabi_attribute 22
-; CORTEX-R7-FAST: .eabi_attribute 23, 1
; CORTEX-R8: .cpu cortex-r8
; CORTEX-R8: .eabi_attribute 6, 10
@@ -1329,12 +1063,6 @@
; CORTEX-R8-NOT: .eabi_attribute 28
; CORTEX-R8: .eabi_attribute 38, 1
-; CORTEX-R8-FAST-NOT: .eabi_attribute 19
-;; The R8 has the VFPv3 FP unit, which always flushes preserving sign.
-; CORTEX-R8-FAST: .eabi_attribute 20, 2
-; CORTEX-R8-FAST-NOT: .eabi_attribute 21
-; CORTEX-R8-FAST-NOT: .eabi_attribute 22
-; CORTEX-R8-FAST: .eabi_attribute 23, 1
; CORTEX-A32: .cpu cortex-a32
; CORTEX-A32: .eabi_attribute 6, 14
@@ -1359,12 +1087,6 @@
; CORTEX-A32-NOT: .eabi_attribute 28
; CORTEX-A32: .eabi_attribute 38, 1
-; CORTEX-A32-FAST-NOT: .eabi_attribute 19
-;; The A32 has the ARMv8 FP unit, which always flushes preserving sign.
-; CORTEX-A32-FAST: .eabi_attribute 20, 2
-; CORTEX-A32-FAST-NOT: .eabi_attribute 21
-; CORTEX-A32-FAST-NOT: .eabi_attribute 22
-; CORTEX-A32-FAST: .eabi_attribute 23, 1
; CORTEX-M23: .cpu cortex-m23
; CORTEX-M23: .eabi_attribute 6, 16
@@ -1430,11 +1152,6 @@
; CORTEX-M35P: .eabi_attribute 38, 1
; CORTEX-M35P: .eabi_attribute 14, 0
-; CORTEX-M33-FAST-NOT: .eabi_attribute 19
-; CORTEX-M33-FAST: .eabi_attribute 20, 2
-; CORTEX-M33-FAST-NOT: .eabi_attribute 21
-; CORTEX-M33-FAST-NOT: .eabi_attribute 22
-; CORTEX-M33-FAST: .eabi_attribute 23, 1
; CORTEX-A35: .cpu cortex-a35
; CORTEX-A35: .eabi_attribute 6, 14
@@ -1459,12 +1176,6 @@
; CORTEX-A35-NOT: .eabi_attribute 28
; CORTEX-A35: .eabi_attribute 38, 1
-; CORTEX-A35-FAST-NOT: .eabi_attribute 19
-;; The A35 has the ARMv8 FP unit, which always flushes preserving sign.
-; CORTEX-A35-FAST: .eabi_attribute 20, 2
-; CORTEX-A35-FAST-NOT: .eabi_attribute 21
-; CORTEX-A35-FAST-NOT: .eabi_attribute 22
-; CORTEX-A35-FAST: .eabi_attribute 23, 1
; CORTEX-A53: .cpu cortex-a53
; CORTEX-A53: .eabi_attribute 6, 14
@@ -1489,12 +1200,6 @@
; CORTEX-A53-NOT: .eabi_attribute 28
; CORTEX-A53: .eabi_attribute 38, 1
-; CORTEX-A53-FAST-NOT: .eabi_attribute 19
-;; The A53 has the ARMv8 FP unit, which always flushes preserving sign.
-; CORTEX-A53-FAST: .eabi_attribute 20, 2
-; CORTEX-A53-FAST-NOT: .eabi_attribute 21
-; CORTEX-A53-FAST-NOT: .eabi_attribute 22
-; CORTEX-A53-FAST: .eabi_attribute 23, 1
; CORTEX-A57: .cpu cortex-a57
; CORTEX-A57: .eabi_attribute 6, 14
@@ -1519,12 +1224,6 @@
; CORTEX-A57-NOT: .eabi_attribute 28
; CORTEX-A57: .eabi_attribute 38, 1
-; CORTEX-A57-FAST-NOT: .eabi_attribute 19
-;; The A57 has the ARMv8 FP unit, which always flushes preserving sign.
-; CORTEX-A57-FAST: .eabi_attribute 20, 2
-; CORTEX-A57-FAST-NOT: .eabi_attribute 21
-; CORTEX-A57-FAST-NOT: .eabi_attribute 22
-; CORTEX-A57-FAST: .eabi_attribute 23, 1
; CORTEX-A72: .cpu cortex-a72
; CORTEX-A72: .eabi_attribute 6, 14
@@ -1549,12 +1248,6 @@
; CORTEX-A72-NOT: .eabi_attribute 28
; CORTEX-A72: .eabi_attribute 38, 1
-; CORTEX-A72-FAST-NOT: .eabi_attribute 19
-;; The A72 has the ARMv8 FP unit, which always flushes preserving sign.
-; CORTEX-A72-FAST: .eabi_attribute 20, 2
-; CORTEX-A72-FAST-NOT: .eabi_attribute 21
-; CORTEX-A72-FAST-NOT: .eabi_attribute 22
-; CORTEX-A72-FAST: .eabi_attribute 23, 1
; CORTEX-A73: .cpu cortex-a73
; CORTEX-A73: .eabi_attribute 6, 14
@@ -1580,12 +1273,6 @@
; CORTEX-A73: .eabi_attribute 38, 1
; CORTEX-A73: .eabi_attribute 14, 0
-; EXYNOS-FAST-NOT: .eabi_attribute 19
-;; The Exynos processors have the ARMv8 FP unit, which always flushes preserving sign.
-; EXYNOS-FAST: .eabi_attribute 20, 2
-; EXYNOS-FAST-NOT: .eabi_attribute 21
-; EXYNOS-FAST-NOT: .eabi_attribute 22
-; EXYNOS-FAST: .eabi_attribute 23, 1
; EXYNOS-M3: .cpu exynos-m3
; EXYNOS-M3: .eabi_attribute 6, 14
@@ -1684,12 +1371,6 @@
; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 28
; GENERIC-ARMV8_1-A: .eabi_attribute 38, 1
-; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 19
-;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign.
-; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 20, 2
-; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 21
-; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 22
-; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 23, 1
; RELOC-PIC: .eabi_attribute 15, 1
; RELOC-PIC: .eabi_attribute 16, 1
diff --git a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll
new file mode 100644
index 000000000000..972a4708994d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll
@@ -0,0 +1,39 @@
+;; Test if a potential indirect call target function which has internal linkage and
+;; address taken has its type ID emitted to callgraph section.
+;; This test also makes sure that callback functions which meet the above constraint
+;; are handled correctly.
+
+; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -o - < %s | FileCheck %s
+
+declare !type !0 void @_Z6doWorkPFviE(ptr)
+
+define i32 @_Z4testv() !type !1 {
+entry:
+ call void @_Z6doWorkPFviE(ptr nonnull @_ZL10myCallbacki)
+ ret i32 0
+}
+
+; CHECK: _ZL10myCallbacki:
+; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]:
+define internal void @_ZL10myCallbacki(i32 %value) !type !2 {
+entry:
+ %sink = alloca i32, align 4
+ store volatile i32 %value, ptr %sink, align 4
+ %i1 = load volatile i32, ptr %sink, align 4
+ ret void
+}
+
+!0 = !{i64 0, !"_ZTSFvPFviEE.generalized"}
+!1 = !{i64 0, !"_ZTSFivE.generalized"}
+!2 = !{i64 0, !"_ZTSFviE.generalized"}
+
+; CHECK: .section .llvm.callgraph,"o",%progbits,.text
+;; Version
+; CHECK-NEXT: .byte 0
+;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0.
+; CHECK-NEXT: .byte 1
+;; Function Entry PC
+; CHECK-NEXT: .long [[LABEL_FUNC]]
+;; Function type ID -5212364466660467813
+; CHECK-NEXT: .long 1154849691
+; CHECK-NEXT: .long 3081369122
diff --git a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll
new file mode 100644
index 000000000000..ec8d5b8ad94a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll
@@ -0,0 +1,63 @@
+;; Test if temporary labels are generated for each indirect callsite.
+;; Test if the .llvm.callgraph section contains the MD5 hash of callees' type (type id)
+;; is correctly paired with its corresponding temporary label generated for indirect
+;; call sites annotated with !callee_type metadata.
+;; Test if the .llvm.callgraph section contains unique direct callees.
+
+; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -o - < %s | FileCheck %s
+
+declare !type !0 void @direct_foo()
+declare !type !1 i32 @direct_bar(i8)
+declare !type !2 ptr @direct_baz(ptr)
+
+; CHECK: ball:
+; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]:
+define ptr @ball() {
+entry:
+ call void @direct_foo()
+ %fp_foo_val = load ptr, ptr null, align 8
+ call void (...) %fp_foo_val(), !callee_type !0
+ call void @direct_foo()
+ %fp_bar_val = load ptr, ptr null, align 8
+ %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2
+ %call_fp_bar_direct = call i32 @direct_bar(i8 1)
+ %fp_baz_val = load ptr, ptr null, align 8
+ %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4
+ call void @direct_foo()
+ %call_fp_baz_direct = call ptr @direct_baz(ptr null)
+ call void @direct_foo()
+ ret ptr %call_fp_baz
+}
+
+!0 = !{!1}
+!1 = !{i64 0, !"_ZTSFvE.generalized"}
+!2 = !{!3}
+!3 = !{i64 0, !"_ZTSFicE.generalized"}
+!4 = !{!5}
+!5 = !{i64 0, !"_ZTSFPvS_E.generalized"}
+
+; CHECK: .section .llvm.callgraph,"o",%progbits,.text
+;; Version
+; CHECK-NEXT: .byte 0
+;; Flags
+; CHECK-NEXT: .byte 7
+;; Function Entry PC
+; CHECK-NEXT: .long [[LABEL_FUNC]]
+;; Function type ID -- set to 0 as no type metadata attached to function.
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+;; Number of unique direct callees.
+; CHECK-NEXT: .byte 3
+;; Direct callees.
+; CHECK-NEXT: .long direct_foo
+; CHECK-NEXT: .long direct_bar
+; CHECK-NEXT: .long direct_baz
+;; Number of unique indirect target type IDs.
+; CHECK-NEXT: .byte 3
+;; Indirect type IDs.
+; CHECK-NEXT: .long 838288420
+; CHECK-NEXT: .long 1053552373
+; CHECK-NEXT: .long 1505527380
+; CHECK-NEXT: .long 814631809
+; CHECK-NEXT: .long 342417018
+; CHECK-NEXT: .long 2013108216
diff --git a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll
new file mode 100644
index 000000000000..80360041c106
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll
@@ -0,0 +1,34 @@
+;; Tests that we store the type identifiers in .llvm.callgraph section of the object file for tailcalls.
+
+; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \
+; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s
+
+define i32 @check_tailcall(ptr %func, i8 %x) !type !0 {
+entry:
+ %call = tail call i32 %func(i8 signext %x), !callee_type !1
+ ret i32 %call
+}
+
+define i32 @main(i32 %argc) !type !3 {
+entry:
+ %andop = and i32 %argc, 1
+ %cmp = icmp eq i32 %andop, 0
+ %foo.bar = select i1 %cmp, ptr @foo, ptr @bar
+ %call.i = tail call i32 %foo.bar(i8 signext 97), !callee_type !1
+ ret i32 %call.i
+}
+
+declare !type !2 i32 @foo(i8 signext)
+
+declare !type !2 i32 @bar(i8 signext)
+
+!0 = !{i64 0, !"_ZTSFiPvcE.generalized"}
+!1 = !{!2}
+!2 = !{i64 0, !"_ZTSFicE.generalized"}
+!3 = !{i64 0, !"_ZTSFiiE.generalized"}
+
+; CHECK: Hex dump of section '.llvm.callgraph':
+; CHECK-NEXT: 0x00000000 00050000 00008e19 0b7f3326 e3000154
+; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05100000 00a150b8
+;; Verify that the type id 0x308e4b8159bc8654 is in section.
+; CHECK-NEXT: 0x00000020 3e0cfe3c b2015486 bc59814b 8e30
diff --git a/llvm/test/CodeGen/ARM/call-graph-section.ll b/llvm/test/CodeGen/ARM/call-graph-section.ll
new file mode 100644
index 000000000000..167cc6f3c73b
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/call-graph-section.ll
@@ -0,0 +1,37 @@
+;; Tests that we store the type identifiers in .llvm.callgraph section of the object file.
+
+; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \
+; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s
+
+declare !type !0 void @foo()
+
+declare !type !1 i32 @bar(i8)
+
+declare !type !2 ptr @baz(ptr)
+
+define void @main() {
+entry:
+ %fp_foo_val = load ptr, ptr null, align 8
+ call void (...) %fp_foo_val(), !callee_type !1
+ %fp_bar_val = load ptr, ptr null, align 8
+ %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !3
+ %fp_baz_val = load ptr, ptr null, align 8
+ %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4
+ ret void
+}
+
+;; Check that the numeric type id (md5 hash) for the below type ids are emitted
+;; to the callgraph section.
+!0 = !{i64 0, !"_ZTSFvE.generalized"}
+!1 = !{!0}
+!2 = !{i64 0, !"_ZTSFicE.generalized"}
+!3 = !{!2}
+!4 = !{!5}
+!5 = !{i64 0, !"_ZTSFPvS_E.generalized"}
+
+;; Make sure following type IDs are in call graph section
+;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814
+; CHECK: Hex dump of section '.llvm.callgraph':
+; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000324
+; CHECK-NEXT: 0x00000010 44f731f5 eecb3e54 86bc5981 4b8e307a
+; CHECK-NEXT: 0x00000020 de6814f8 97fd77
diff --git a/llvm/test/CodeGen/ARM/carry.ll b/llvm/test/CodeGen/ARM/carry.ll
index 558e2b0e43f7..a652241dac5b 100644
--- a/llvm/test/CodeGen/ARM/carry.ll
+++ b/llvm/test/CodeGen/ARM/carry.ll
@@ -1,61 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s
define i64 @f1(i64 %a, i64 %b) {
; CHECK-LABEL: f1:
-; CHECK: subs r
-; CHECK: sbc r
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: subs r0, r0, r2
+; CHECK-NEXT: sbc r1, r1, r3
+; CHECK-NEXT: bx lr
entry:
- %tmp = sub i64 %a, %b
- ret i64 %tmp
+ %tmp = sub i64 %a, %b
+ ret i64 %tmp
}
define i64 @f2(i64 %a, i64 %b) {
; CHECK-LABEL: f2:
-; CHECK: lsl r
-; CHECK: orr r
-; CHECK: rsbs r
-; CHECK: sbc r
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsl r1, r1, #1
+; CHECK-NEXT: orr r1, r1, r0, lsr #31
+; CHECK-NEXT: rsbs r0, r2, r0, lsl #1
+; CHECK-NEXT: sbc r1, r1, r3
+; CHECK-NEXT: bx lr
entry:
- %tmp1 = shl i64 %a, 1
- %tmp2 = sub i64 %tmp1, %b
- ret i64 %tmp2
+ %tmp1 = shl i64 %a, 1
+ %tmp2 = sub i64 %tmp1, %b
+ ret i64 %tmp2
}
; add with live carry
define i64 @f3(i32 %al, i32 %bl) {
; CHECK-LABEL: f3:
-; CHECK: adds r
-; CHECK: adc r
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: adcs r0, r1, #0
+; CHECK-NEXT: adc r1, r2, #0
+; CHECK-NEXT: bx lr
entry:
- ; unsigned wide add
- %aw = zext i32 %al to i64
- %bw = zext i32 %bl to i64
- %cw = add i64 %aw, %bw
- ; ch == carry bit
- %ch = lshr i64 %cw, 32
- %dw = add i64 %ch, %bw
- ret i64 %dw
+ ; unsigned wide add
+ %aw = zext i32 %al to i64
+ %bw = zext i32 %bl to i64
+ %cw = add i64 %aw, %bw
+ ; ch == carry bit
+ %ch = lshr i64 %cw, 32
+ %dw = add i64 %ch, %bw
+ ret i64 %dw
}
; rdar://10073745
define i64 @f4(i64 %x) nounwind readnone {
-entry:
; CHECK-LABEL: f4:
-; CHECK: rsbs r
-; CHECK: rsc r
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: rsc r1, r1, #0
+; CHECK-NEXT: bx lr
+entry:
%0 = sub nsw i64 0, %x
ret i64 %0
}
; rdar://12559385
define i64 @f5(i32 %vi) {
-entry:
; CHECK-LABEL: f5:
-; CHECK: movw [[REG:r[0-9]+]], #36102
-; CHECK: sbc r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
- %v0 = zext i32 %vi to i64
- %v1 = xor i64 %v0, -155057456198619
- %v4 = add i64 %v1, 155057456198619
- %v5 = add i64 %v4, %v1
- ret i64 %v5
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: movw r1, #19493
+; CHECK-NEXT: movw r2, #29433
+; CHECK-NEXT: movt r1, #57191
+; CHECK-NEXT: eor r0, r0, r1
+; CHECK-NEXT: movw r3, #46043
+; CHECK-NEXT: movt r2, #65535
+; CHECK-NEXT: adds r0, r0, r0
+; CHECK-NEXT: movw r1, #36102
+; CHECK-NEXT: sbc r2, r2, r1
+; CHECK-NEXT: movt r3, #8344
+; CHECK-NEXT: adds r0, r0, r3
+; CHECK-NEXT: adc r1, r2, r1
+; CHECK-NEXT: bx lr
+entry:
+ %v0 = zext i32 %vi to i64
+ %v1 = xor i64 %v0, -155057456198619
+ %v4 = add i64 %v1, 155057456198619
+ %v5 = add i64 %v4, %v1
+ ret i64 %v5
}
diff --git a/llvm/test/CodeGen/ARM/combine-movc-sub.ll b/llvm/test/CodeGen/ARM/combine-movc-sub.ll
index ca5d08944354..8ca4c4320987 100644
--- a/llvm/test/CodeGen/ARM/combine-movc-sub.ll
+++ b/llvm/test/CodeGen/ARM/combine-movc-sub.ll
@@ -27,11 +27,11 @@ define hidden fastcc ptr @test(ptr %Search, ptr %ClauseList, i32 %Level, ptr noc
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: sub.w r7, r2, #32
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: sub.w r8, r2, #32
+; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: movs r0, #1
; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: add.w r6, r0, r7, lsr #5
+; CHECK-NEXT: add.w r7, r0, r8, lsr #5
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: mov.w r9, #0
; CHECK-NEXT: b .LBB0_2
@@ -44,16 +44,16 @@ define hidden fastcc ptr @test(ptr %Search, ptr %ClauseList, i32 %Level, ptr noc
; CHECK-NEXT: mov r2, r4
; CHECK-NEXT: cmp r4, #31
; CHECK-NEXT: ldr r0, [r1, #16]
-; CHECK-NEXT: add.w r0, r0, r6, lsl #2
+; CHECK-NEXT: add.w r0, r0, r7, lsl #2
; CHECK-NEXT: ldr r0, [r0, #40]
; CHECK-NEXT: it hi
-; CHECK-NEXT: andhi r2, r7, #31
+; CHECK-NEXT: andhi r2, r8, #31
; CHECK-NEXT: lsrs r0, r2
; CHECK-NEXT: lsls r0, r0, #31
; CHECK-NEXT: beq .LBB0_1
; CHECK-NEXT: @ %bb.3: @ %if.then
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl foo
; CHECK-NEXT: str.w r9, [r5, #4]
; CHECK-NEXT: b .LBB0_1
diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
index 1bee32f4c90c..fe23e8594c94 100644
--- a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
+++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
@@ -22,15 +22,16 @@ body: |
; CHECK-LABEL: name: test_groups
; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4
- ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
- ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
- ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
- ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
- ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
- renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
+ renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/ARM/extract-bits.ll b/llvm/test/CodeGen/ARM/extract-bits.ll
new file mode 100644
index 000000000000..d717806098fb
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/extract-bits.ll
@@ -0,0 +1,4591 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
+; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
+; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
+; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
+
+; Patterns:
+; a) (x >> start) & (1 << nbits) - 1
+; b) (x >> start) & ~(-1 << nbits)
+; c) (x >> start) & (-1 >> (32 - y))
+; d) (x >> start) << (32 - y) >> (32 - y)
+; are equivalent.
+
+; ---------------------------------------------------------------------------- ;
+; Pattern a. 32-bit
+; ---------------------------------------------------------------------------- ;
+
+define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_a0:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: movs r1, #1
+; V7M-NEXT: lsls r1, r2
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_a0:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r12, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r2, r3, r12, lsl r2
+; V7A-NEXT: and r0, r2, r0, lsr r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_a0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsls r1, r2
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_a0:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #1
+; V6M-NEXT: lsls r1, r2
+; V6M-NEXT: subs r1, r1, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %shifted = lshr i32 %val, %numskipbits
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_a0_arithmetic:
+; V7M: @ %bb.0:
+; V7M-NEXT: asrs r0, r1
+; V7M-NEXT: movs r1, #1
+; V7M-NEXT: lsls r1, r2
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_a0_arithmetic:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r12, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r2, r3, r12, lsl r2
+; V7A-NEXT: and r0, r2, r0, asr r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_a0_arithmetic:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: asrs r0, r1
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsls r1, r2
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_a0_arithmetic:
+; V6M: @ %bb.0:
+; V6M-NEXT: asrs r0, r1
+; V6M-NEXT: movs r1, #1
+; V6M-NEXT: lsls r1, r2
+; V6M-NEXT: subs r1, r1, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %shifted = ashr i32 %val, %numskipbits
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bextr32_a1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: movs r1, #1
+; V7M-NEXT: lsls r1, r2
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_a1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r12, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r2, r3, r12, lsl r2
+; V7A-NEXT: and r0, r2, r0, lsr r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_a1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsls r1, r2
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_a1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #1
+; V6M-NEXT: lsls r1, r2
+; V6M-NEXT: subs r1, r1, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %skip = zext i8 %numskipbits to i32
+ %shifted = lshr i32 %val, %skip
+ %conv = zext i8 %numlowbits to i32
+ %onebit = shl i32 1, %conv
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_a2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: movs r1, #1
+; V7M-NEXT: lsls r1, r2
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_a2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mov r12, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r2, r3, r12, lsl r2
+; V7A-NEXT: and r0, r2, r0, lsr r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_a2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsls r1, r2
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_a2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r3, [r0]
+; V6M-NEXT: lsrs r3, r1
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: lsls r0, r2
+; V6M-NEXT: subs r0, r0, #1
+; V6M-NEXT: ands r0, r3
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %shifted = lshr i32 %val, %numskipbits
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bextr32_a3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: movs r1, #1
+; V7M-NEXT: lsls r1, r2
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_a3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mov r12, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r2, r3, r12, lsl r2
+; V7A-NEXT: and r0, r2, r0, lsr r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_a3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsls r1, r2
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_a3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r3, [r0]
+; V6M-NEXT: lsrs r3, r1
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: lsls r0, r2
+; V6M-NEXT: subs r0, r0, #1
+; V6M-NEXT: ands r0, r3
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %skip = zext i8 %numskipbits to i32
+ %shifted = lshr i32 %val, %skip
+ %conv = zext i8 %numlowbits to i32
+ %onebit = shl i32 1, %conv
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_a4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: movs r1, #1
+; V7M-NEXT: lsls r1, r2
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_a4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r12, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r2, r3, r12, lsl r2
+; V7A-NEXT: and r0, r2, r0, lsr r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_a4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsls r1, r2
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_a4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #1
+; V6M-NEXT: lsls r1, r2
+; V6M-NEXT: subs r1, r1, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %shifted = lshr i32 %val, %numskipbits
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %shifted, %mask ; swapped order
+ ret i32 %masked
+}
+
+; 64-bit
+
+define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_a0:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: mov.w lr, #1
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: rsb.w r4, r12, #32
+; V7M-NEXT: subs.w r3, r12, #32
+; V7M-NEXT: lsr.w r4, lr, r4
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r4, lr, r3
+; V7M-NEXT: lsl.w r3, lr, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: subs r3, #1
+; V7M-NEXT: sbc r12, r4, #0
+; V7M-NEXT: rsb.w r4, r2, #32
+; V7M-NEXT: lsl.w r4, r1, r4
+; V7M-NEXT: orrs r0, r4
+; V7M-NEXT: subs.w r4, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r4
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: and.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: and.w r1, r1, r12
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_a0:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r11, lr}
+; V7A-NEXT: push {r4, r5, r11, lr}
+; V7A-NEXT: ldr lr, [sp, #16]
+; V7A-NEXT: mov r5, #1
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: rsb r12, lr, #32
+; V7A-NEXT: subs r4, lr, #32
+; V7A-NEXT: lsr r3, r5, r12
+; V7A-NEXT: lslpl r3, r5, r4
+; V7A-NEXT: lsl r5, r5, lr
+; V7A-NEXT: movwpl r5, #0
+; V7A-NEXT: rsb r4, r2, #32
+; V7A-NEXT: subs r5, r5, #1
+; V7A-NEXT: sbc r3, r3, #0
+; V7A-NEXT: orr r0, r0, r1, lsl r4
+; V7A-NEXT: subs r4, r2, #32
+; V7A-NEXT: lsrpl r0, r1, r4
+; V7A-NEXT: lsr r1, r1, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: and r0, r5, r0
+; V7A-NEXT: and r1, r3, r1
+; V7A-NEXT: pop {r4, r5, r11, pc}
+;
+; V7A-T-LABEL: bextr64_a0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: mov.w lr, #1
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: rsb.w r4, r12, #32
+; V7A-T-NEXT: subs.w r3, r12, #32
+; V7A-T-NEXT: lsr.w r4, lr, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r4, lr, r3
+; V7A-T-NEXT: lsl.w r3, lr, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r3, #0
+; V7A-T-NEXT: subs r3, #1
+; V7A-T-NEXT: sbc r12, r4, #0
+; V7A-T-NEXT: rsb.w r4, r2, #32
+; V7A-T-NEXT: lsl.w r4, r1, r4
+; V7A-T-NEXT: orrs r0, r4
+; V7A-T-NEXT: subs.w r4, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r4
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: and.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: and.w r1, r1, r12
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_a0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, r7, lr}
+; V6M-NEXT: push {r4, r5, r6, r7, lr}
+; V6M-NEXT: .pad #12
+; V6M-NEXT: sub sp, #12
+; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; V6M-NEXT: mov r6, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r7, #0
+; V6M-NEXT: ldr r2, [sp, #32]
+; V6M-NEXT: mov r1, r7
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: subs r5, r0, #1
+; V6M-NEXT: sbcs r4, r7
+; V6M-NEXT: mov r0, r6
+; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: add sp, #12
+; V6M-NEXT: pop {r4, r5, r6, r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %onebit = shl i64 1, %numlowbits
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_a0_arithmetic:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: mov.w lr, #1
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: rsb.w r4, r12, #32
+; V7M-NEXT: subs.w r3, r12, #32
+; V7M-NEXT: lsr.w r4, lr, r4
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r4, lr, r3
+; V7M-NEXT: lsl.w r3, lr, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: subs r3, #1
+; V7M-NEXT: sbc r12, r4, #0
+; V7M-NEXT: rsb.w r4, r2, #32
+; V7M-NEXT: lsl.w r4, r1, r4
+; V7M-NEXT: orrs r0, r4
+; V7M-NEXT: subs.w r4, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: asrpl.w r0, r1, r4
+; V7M-NEXT: asr.w r2, r1, r2
+; V7M-NEXT: and.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: asrpl r2, r1, #31
+; V7M-NEXT: and.w r1, r12, r2
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_a0_arithmetic:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r11, lr}
+; V7A-NEXT: push {r4, r5, r11, lr}
+; V7A-NEXT: ldr lr, [sp, #16]
+; V7A-NEXT: mov r5, #1
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: rsb r12, lr, #32
+; V7A-NEXT: subs r4, lr, #32
+; V7A-NEXT: lsr r3, r5, r12
+; V7A-NEXT: lslpl r3, r5, r4
+; V7A-NEXT: lsl r5, r5, lr
+; V7A-NEXT: movwpl r5, #0
+; V7A-NEXT: rsb r4, r2, #32
+; V7A-NEXT: subs r5, r5, #1
+; V7A-NEXT: sbc r3, r3, #0
+; V7A-NEXT: orr r0, r0, r1, lsl r4
+; V7A-NEXT: subs r4, r2, #32
+; V7A-NEXT: asr r2, r1, r2
+; V7A-NEXT: asrpl r2, r1, #31
+; V7A-NEXT: asrpl r0, r1, r4
+; V7A-NEXT: and r1, r3, r2
+; V7A-NEXT: and r0, r5, r0
+; V7A-NEXT: pop {r4, r5, r11, pc}
+;
+; V7A-T-LABEL: bextr64_a0_arithmetic:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: mov.w lr, #1
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: rsb.w r4, r12, #32
+; V7A-T-NEXT: subs.w r3, r12, #32
+; V7A-T-NEXT: lsr.w r4, lr, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r4, lr, r3
+; V7A-T-NEXT: lsl.w r3, lr, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r3, #0
+; V7A-T-NEXT: subs r3, #1
+; V7A-T-NEXT: sbc r12, r4, #0
+; V7A-T-NEXT: rsb.w r4, r2, #32
+; V7A-T-NEXT: lsl.w r4, r1, r4
+; V7A-T-NEXT: orrs r0, r4
+; V7A-T-NEXT: subs.w r4, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: asrpl.w r0, r1, r4
+; V7A-T-NEXT: asr.w r2, r1, r2
+; V7A-T-NEXT: and.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: asrpl r2, r1, #31
+; V7A-T-NEXT: and.w r1, r12, r2
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_a0_arithmetic:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, r7, lr}
+; V6M-NEXT: push {r4, r5, r6, r7, lr}
+; V6M-NEXT: .pad #12
+; V6M-NEXT: sub sp, #12
+; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; V6M-NEXT: mov r6, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r7, #0
+; V6M-NEXT: ldr r2, [sp, #32]
+; V6M-NEXT: mov r1, r7
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: subs r5, r0, #1
+; V6M-NEXT: sbcs r4, r7
+; V6M-NEXT: mov r0, r6
+; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; V6M-NEXT: bl __aeabi_lasr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: add sp, #12
+; V6M-NEXT: pop {r4, r5, r6, r7, pc}
+ %shifted = ashr i64 %val, %numskipbits
+ %onebit = shl i64 1, %numlowbits
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bextr64_a1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: rsb.w r4, r3, #32
+; V7M-NEXT: mov.w lr, #1
+; V7M-NEXT: subs.w r12, r3, #32
+; V7M-NEXT: lsl.w r3, lr, r3
+; V7M-NEXT: lsr.w r4, lr, r4
+; V7M-NEXT: lsr.w r0, r0, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r4, lr, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: subs r3, #1
+; V7M-NEXT: sbc r12, r4, #0
+; V7M-NEXT: rsb.w r4, r2, #32
+; V7M-NEXT: lsl.w r4, r1, r4
+; V7M-NEXT: orrs r0, r4
+; V7M-NEXT: subs.w r4, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r4
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: and.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: and.w r1, r1, r12
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_a1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, lr}
+; V7A-NEXT: push {r4, lr}
+; V7A-NEXT: rsb r12, r3, #32
+; V7A-NEXT: mov lr, #1
+; V7A-NEXT: subs r4, r3, #32
+; V7A-NEXT: lsl r3, lr, r3
+; V7A-NEXT: lsr r12, lr, r12
+; V7A-NEXT: movwpl r3, #0
+; V7A-NEXT: lslpl r12, lr, r4
+; V7A-NEXT: rsb r4, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: subs r3, r3, #1
+; V7A-NEXT: sbc r12, r12, #0
+; V7A-NEXT: orr r0, r0, r1, lsl r4
+; V7A-NEXT: subs r4, r2, #32
+; V7A-NEXT: lsrpl r0, r1, r4
+; V7A-NEXT: lsr r1, r1, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: and r0, r3, r0
+; V7A-NEXT: and r1, r12, r1
+; V7A-NEXT: pop {r4, pc}
+;
+; V7A-T-LABEL: bextr64_a1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: rsb.w r4, r3, #32
+; V7A-T-NEXT: mov.w lr, #1
+; V7A-T-NEXT: subs.w r12, r3, #32
+; V7A-T-NEXT: lsl.w r3, lr, r3
+; V7A-T-NEXT: lsr.w r4, lr, r4
+; V7A-T-NEXT: lsr.w r0, r0, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r4, lr, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r3, #0
+; V7A-T-NEXT: subs r3, #1
+; V7A-T-NEXT: sbc r12, r4, #0
+; V7A-T-NEXT: rsb.w r4, r2, #32
+; V7A-T-NEXT: lsl.w r4, r1, r4
+; V7A-T-NEXT: orrs r0, r4
+; V7A-T-NEXT: subs.w r4, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r4
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: and.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: and.w r1, r1, r12
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_a1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, r7, lr}
+; V6M-NEXT: push {r4, r5, r6, r7, lr}
+; V6M-NEXT: .pad #12
+; V6M-NEXT: sub sp, #12
+; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; V6M-NEXT: mov r6, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r7, #0
+; V6M-NEXT: mov r1, r7
+; V6M-NEXT: mov r2, r3
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: subs r5, r0, #1
+; V6M-NEXT: sbcs r4, r7
+; V6M-NEXT: mov r0, r6
+; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: add sp, #12
+; V6M-NEXT: pop {r4, r5, r6, r7, pc}
+ %skip = zext i8 %numskipbits to i64
+ %shifted = lshr i64 %val, %skip
+ %conv = zext i8 %numlowbits to i64
+ %onebit = shl i64 1, %conv
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_a2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_a2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: mov.w lr, #1
+; V7M-NEXT: rsb.w r1, r12, #32
+; V7M-NEXT: subs.w r3, r12, #32
+; V7M-NEXT: lsr.w r1, lr, r1
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, lr, r3
+; V7M-NEXT: lsl.w r3, lr, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: subs.w lr, r3, #1
+; V7M-NEXT: ldrd r0, r3, [r0]
+; V7M-NEXT: sbc r12, r1, #0
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: lsl.w r1, r3, r1
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: orrs r0, r1
+; V7M-NEXT: subs.w r1, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r3, r1
+; V7M-NEXT: lsr.w r1, r3, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: and.w r0, r0, lr
+; V7M-NEXT: and.w r1, r1, r12
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bextr64_a2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r6, lr}
+; V7A-NEXT: push {r4, r5, r6, lr}
+; V7A-NEXT: ldr r1, [sp, #16]
+; V7A-NEXT: mov r3, #1
+; V7A-NEXT: ldr r6, [r0]
+; V7A-NEXT: ldr r5, [r0, #4]
+; V7A-NEXT: rsb r0, r1, #32
+; V7A-NEXT: subs r4, r1, #32
+; V7A-NEXT: lsl r1, r3, r1
+; V7A-NEXT: lsr r0, r3, r0
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: lslpl r0, r3, r4
+; V7A-NEXT: subs r1, r1, #1
+; V7A-NEXT: sbc r3, r0, #0
+; V7A-NEXT: lsr r0, r6, r2
+; V7A-NEXT: rsb r6, r2, #32
+; V7A-NEXT: orr r0, r0, r5, lsl r6
+; V7A-NEXT: subs r6, r2, #32
+; V7A-NEXT: lsrpl r0, r5, r6
+; V7A-NEXT: and r0, r1, r0
+; V7A-NEXT: lsr r1, r5, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: and r1, r3, r1
+; V7A-NEXT: pop {r4, r5, r6, pc}
+;
+; V7A-T-LABEL: bextr64_a2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: movs r3, #1
+; V7A-T-NEXT: ldrd lr, r1, [r0]
+; V7A-T-NEXT: rsb.w r4, r12, #32
+; V7A-T-NEXT: subs.w r0, r12, #32
+; V7A-T-NEXT: lsr.w r4, r3, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r4, r3, r0
+; V7A-T-NEXT: lsl.w r0, r3, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsr.w r3, lr, r2
+; V7A-T-NEXT: subs r0, #1
+; V7A-T-NEXT: sbc r12, r4, #0
+; V7A-T-NEXT: rsb.w r4, r2, #32
+; V7A-T-NEXT: lsl.w r4, r1, r4
+; V7A-T-NEXT: orrs r3, r4
+; V7A-T-NEXT: subs.w r4, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r3, r1, r4
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: and.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: and.w r1, r1, r12
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_a2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, r7, lr}
+; V6M-NEXT: push {r4, r5, r6, r7, lr}
+; V6M-NEXT: .pad #4
+; V6M-NEXT: sub sp, #4
+; V6M-NEXT: str r2, [sp] @ 4-byte Spill
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r7, #0
+; V6M-NEXT: ldr r2, [sp, #24]
+; V6M-NEXT: mov r1, r7
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r6, r1
+; V6M-NEXT: subs r4, r0, #1
+; V6M-NEXT: sbcs r6, r7
+; V6M-NEXT: ldm r5!, {r0, r1}
+; V6M-NEXT: ldr r2, [sp] @ 4-byte Reload
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r4
+; V6M-NEXT: ands r1, r6
+; V6M-NEXT: add sp, #4
+; V6M-NEXT: pop {r4, r5, r6, r7, pc}
+ %val = load i64, ptr %w
+ %shifted = lshr i64 %val, %numskipbits
+ %onebit = shl i64 1, %numlowbits
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bextr64_a3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: mov.w r12, #1
+; V7M-NEXT: subs.w lr, r2, #32
+; V7M-NEXT: lsl.w r2, r12, r2
+; V7M-NEXT: lsr.w r3, r12, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r3, r12, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: subs.w lr, r2, #1
+; V7M-NEXT: ldrd r0, r2, [r0]
+; V7M-NEXT: sbc r12, r3, #0
+; V7M-NEXT: rsb.w r3, r1, #32
+; V7M-NEXT: lsl.w r3, r2, r3
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: orrs r0, r3
+; V7M-NEXT: subs.w r3, r1, #32
+; V7M-NEXT: lsr.w r1, r2, r1
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r2, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: and.w r0, r0, lr
+; V7M-NEXT: and.w r1, r1, r12
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bextr64_a3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r6, lr}
+; V7A-NEXT: push {r4, r5, r6, lr}
+; V7A-NEXT: ldr r6, [r0]
+; V7A-NEXT: mov r3, #1
+; V7A-NEXT: ldr r5, [r0, #4]
+; V7A-NEXT: rsb r0, r2, #32
+; V7A-NEXT: subs r4, r2, #32
+; V7A-NEXT: lsl r2, r3, r2
+; V7A-NEXT: lsr r0, r3, r0
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: lslpl r0, r3, r4
+; V7A-NEXT: subs r3, r2, #1
+; V7A-NEXT: sbc r0, r0, #0
+; V7A-NEXT: lsr r2, r5, r1
+; V7A-NEXT: subs r4, r1, #32
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: and r2, r0, r2
+; V7A-NEXT: lsr r0, r6, r1
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: orr r0, r0, r5, lsl r1
+; V7A-NEXT: mov r1, r2
+; V7A-NEXT: lsrpl r0, r5, r4
+; V7A-NEXT: and r0, r3, r0
+; V7A-NEXT: pop {r4, r5, r6, pc}
+;
+; V7A-T-LABEL: bextr64_a3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: rsb.w r4, r2, #32
+; V7A-T-NEXT: mov.w lr, #1
+; V7A-T-NEXT: subs.w r3, r2, #32
+; V7A-T-NEXT: lsl.w r2, lr, r2
+; V7A-T-NEXT: lsr.w r4, lr, r4
+; V7A-T-NEXT: ldrd r12, r0, [r0]
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r4, lr, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: subs.w lr, r2, #1
+; V7A-T-NEXT: sbc r2, r4, #0
+; V7A-T-NEXT: lsr.w r4, r0, r1
+; V7A-T-NEXT: subs.w r3, r1, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r4, #0
+; V7A-T-NEXT: and.w r2, r2, r4
+; V7A-T-NEXT: rsb.w r4, r1, #32
+; V7A-T-NEXT: lsr.w r1, r12, r1
+; V7A-T-NEXT: lsl.w r4, r0, r4
+; V7A-T-NEXT: orr.w r1, r1, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r1, r0, r3
+; V7A-T-NEXT: and.w r0, lr, r1
+; V7A-T-NEXT: mov r1, r2
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_a3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, r7, lr}
+; V6M-NEXT: push {r4, r5, r6, r7, lr}
+; V6M-NEXT: .pad #4
+; V6M-NEXT: sub sp, #4
+; V6M-NEXT: str r1, [sp] @ 4-byte Spill
+; V6M-NEXT: mov r6, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r7, #0
+; V6M-NEXT: mov r1, r7
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r5, r1
+; V6M-NEXT: subs r4, r0, #1
+; V6M-NEXT: sbcs r5, r7
+; V6M-NEXT: ldm r6!, {r0, r1}
+; V6M-NEXT: ldr r2, [sp] @ 4-byte Reload
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r4
+; V6M-NEXT: ands r1, r5
+; V6M-NEXT: add sp, #4
+; V6M-NEXT: pop {r4, r5, r6, r7, pc}
+ %val = load i64, ptr %w
+ %skip = zext i8 %numskipbits to i64
+ %shifted = lshr i64 %val, %skip
+ %conv = zext i8 %numlowbits to i64
+ %onebit = shl i64 1, %conv
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_a4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: mov.w lr, #1
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: rsb.w r4, r12, #32
+; V7M-NEXT: subs.w r3, r12, #32
+; V7M-NEXT: lsr.w r4, lr, r4
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r4, lr, r3
+; V7M-NEXT: lsl.w r3, lr, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: subs r3, #1
+; V7M-NEXT: sbc r12, r4, #0
+; V7M-NEXT: rsb.w r4, r2, #32
+; V7M-NEXT: lsl.w r4, r1, r4
+; V7M-NEXT: orrs r0, r4
+; V7M-NEXT: subs.w r4, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r4
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: and.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: and.w r1, r1, r12
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_a4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r11, lr}
+; V7A-NEXT: push {r4, r5, r11, lr}
+; V7A-NEXT: ldr lr, [sp, #16]
+; V7A-NEXT: mov r5, #1
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: rsb r12, lr, #32
+; V7A-NEXT: subs r4, lr, #32
+; V7A-NEXT: lsr r3, r5, r12
+; V7A-NEXT: lslpl r3, r5, r4
+; V7A-NEXT: lsl r5, r5, lr
+; V7A-NEXT: movwpl r5, #0
+; V7A-NEXT: rsb r4, r2, #32
+; V7A-NEXT: subs r5, r5, #1
+; V7A-NEXT: sbc r3, r3, #0
+; V7A-NEXT: orr r0, r0, r1, lsl r4
+; V7A-NEXT: subs r4, r2, #32
+; V7A-NEXT: lsrpl r0, r1, r4
+; V7A-NEXT: lsr r1, r1, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: and r0, r0, r5
+; V7A-NEXT: and r1, r1, r3
+; V7A-NEXT: pop {r4, r5, r11, pc}
+;
+; V7A-T-LABEL: bextr64_a4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: mov.w lr, #1
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: rsb.w r4, r12, #32
+; V7A-T-NEXT: subs.w r3, r12, #32
+; V7A-T-NEXT: lsr.w r4, lr, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r4, lr, r3
+; V7A-T-NEXT: lsl.w r3, lr, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r3, #0
+; V7A-T-NEXT: subs r3, #1
+; V7A-T-NEXT: sbc r12, r4, #0
+; V7A-T-NEXT: rsb.w r4, r2, #32
+; V7A-T-NEXT: lsl.w r4, r1, r4
+; V7A-T-NEXT: orrs r0, r4
+; V7A-T-NEXT: subs.w r4, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r4
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: and.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: and.w r1, r1, r12
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_a4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, r7, lr}
+; V6M-NEXT: push {r4, r5, r6, r7, lr}
+; V6M-NEXT: .pad #12
+; V6M-NEXT: sub sp, #12
+; V6M-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; V6M-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; V6M-NEXT: mov r6, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r7, #0
+; V6M-NEXT: ldr r2, [sp, #32]
+; V6M-NEXT: mov r1, r7
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: subs r5, r0, #1
+; V6M-NEXT: sbcs r4, r7
+; V6M-NEXT: mov r0, r6
+; V6M-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; V6M-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: add sp, #12
+; V6M-NEXT: pop {r4, r5, r6, r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %onebit = shl i64 1, %numlowbits
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %shifted, %mask ; swapped order
+ ret i64 %masked
+}
+
+; 64-bit, but with 32-bit output
+
+; Everything done in 64-bit, truncation happens last.
+define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_a0:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldr r1, [sp]
+; V7M-NEXT: movs r2, #1
+; V7M-NEXT: lsls r2, r1
+; V7M-NEXT: subs r1, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: subs r1, r2, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_a0:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldr r12, [sp]
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: mov r1, #1
+; V7A-NEXT: lsl r1, r1, r12
+; V7A-NEXT: subs r2, r12, #32
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: sub r1, r1, #1
+; V7A-NEXT: and r0, r1, r0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_32_a0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsl.w r1, r1, r12
+; V7A-T-NEXT: subs.w r2, r12, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_a0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: ldr r2, [sp, #8]
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: subs r0, r0, #1
+; V6M-NEXT: ands r0, r4
+; V6M-NEXT: pop {r4, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %onebit = shl i64 1, %numlowbits
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %shifted
+ %res = trunc i64 %masked to i32
+ ret i32 %res
+}
+
+; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
+define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_a1:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldr r1, [sp]
+; V7M-NEXT: movs r2, #1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_a1:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: ldr r12, [sp, #8]
+; V7A-NEXT: mov lr, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: add r12, r3, lr, lsl r12
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: and r0, r12, r0
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bextr64_32_a1:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsl.w r1, r1, r12
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_a1:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r7, lr}
+; V6M-NEXT: push {r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldr r1, [sp, #8]
+; V6M-NEXT: movs r2, #1
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: subs r1, r2, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: pop {r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %truncshifted = trunc i64 %shifted to i32
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %truncshifted
+ ret i32 %masked
+}
+
+; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_a2:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldr r1, [sp]
+; V7M-NEXT: movs r2, #1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_a2:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: ldr r12, [sp, #8]
+; V7A-NEXT: mov lr, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: add r12, r3, lr, lsl r12
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: and r0, r12, r0
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bextr64_32_a2:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: lsl.w r1, r1, r12
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_a2:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r7, lr}
+; V6M-NEXT: push {r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldr r1, [sp, #8]
+; V6M-NEXT: movs r2, #1
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: subs r1, r2, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: pop {r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %zextmask = zext i32 %mask to i64
+ %masked = and i64 %zextmask, %shifted
+ %truncmasked = trunc i64 %masked to i32
+ ret i32 %truncmasked
+}
+
+; ---------------------------------------------------------------------------- ;
+; Pattern b. 32-bit
+; ---------------------------------------------------------------------------- ;
+
+define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_b0:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: lsl.w r2, r3, r2
+; V7M-NEXT: bics r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_b0:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: bic r0, r0, r1, lsl r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_b0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: lsl.w r2, r3, r2
+; V7A-T-NEXT: bics r0, r2
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_b0:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: mvns r1, r1
+; V6M-NEXT: lsls r1, r2
+; V6M-NEXT: bics r0, r1
+; V6M-NEXT: bx lr
+ %shifted = lshr i32 %val, %numskipbits
+ %notmask = shl i32 -1, %numlowbits
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bextr32_b1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: lsl.w r2, r3, r2
+; V7M-NEXT: bics r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_b1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: bic r0, r0, r1, lsl r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_b1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: lsl.w r2, r3, r2
+; V7A-T-NEXT: bics r0, r2
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_b1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: mvns r1, r1
+; V6M-NEXT: lsls r1, r2
+; V6M-NEXT: bics r0, r1
+; V6M-NEXT: bx lr
+ %skip = zext i8 %numskipbits to i32
+ %shifted = lshr i32 %val, %skip
+ %conv = zext i8 %numlowbits to i32
+ %notmask = shl i32 -1, %conv
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_b2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsl.w r2, r3, r2
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bics r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_b2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: bic r0, r0, r1, lsl r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_b2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsl.w r2, r3, r2
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bics r0, r2
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_b2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r3, #0
+; V6M-NEXT: mvns r3, r3
+; V6M-NEXT: lsls r3, r2
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bics r0, r3
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %shifted = lshr i32 %val, %numskipbits
+ %notmask = shl i32 -1, %numlowbits
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bextr32_b3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsl.w r2, r3, r2
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bics r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_b3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: bic r0, r0, r1, lsl r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_b3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsl.w r2, r3, r2
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bics r0, r2
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_b3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r3, #0
+; V6M-NEXT: mvns r3, r3
+; V6M-NEXT: lsls r3, r2
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bics r0, r3
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %skip = zext i8 %numskipbits to i32
+ %shifted = lshr i32 %val, %skip
+ %conv = zext i8 %numlowbits to i32
+ %notmask = shl i32 -1, %conv
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_b4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: lsl.w r2, r3, r2
+; V7M-NEXT: bics r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_b4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: bic r0, r0, r1, lsl r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_b4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: lsl.w r2, r3, r2
+; V7A-T-NEXT: bics r0, r2
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_b4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: mvns r1, r1
+; V6M-NEXT: lsls r1, r2
+; V6M-NEXT: bics r0, r1
+; V6M-NEXT: bx lr
+ %shifted = lshr i32 %val, %numskipbits
+ %notmask = shl i32 -1, %numlowbits
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %shifted, %mask ; swapped order
+ ret i32 %masked
+}
+
+; 64-bit
+
+define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_b0:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orrs r0, r3
+; V7M-NEXT: subs.w r3, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r3
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: lsl.w r3, r2, r12
+; V7M-NEXT: subs.w lr, r12, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r2, r2, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: bics r1, r2
+; V7M-NEXT: bics r0, r3
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bextr64_b0:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldr r12, [sp, #8]
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: subs r3, r2, #32
+; V7A-NEXT: lsrpl r0, r1, r3
+; V7A-NEXT: lsr r1, r1, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: subs lr, r12, #32
+; V7A-NEXT: lsl r2, r3, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: bic r0, r0, r2
+; V7A-NEXT: lslpl r3, r3, lr
+; V7A-NEXT: bic r1, r1, r3
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bextr64_b0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, r5, r7, lr}
+; V7A-T-NEXT: push {r4, r5, r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: ldr.w r12, [sp, #16]
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r5, r0, r3
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: subs.w lr, r12, #32
+; V7A-T-NEXT: lsl.w r0, r3, r12
+; V7A-T-NEXT: itt pl
+; V7A-T-NEXT: lslpl.w r3, r3, lr
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: subs.w r4, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r5, r1, r4
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: bic.w r0, r5, r0
+; V7A-T-NEXT: bics r1, r3
+; V7A-T-NEXT: pop {r4, r5, r7, pc}
+;
+; V6M-LABEL: bextr64_b0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: mov r5, r1
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: ldr r2, [sp, #16]
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r4, r0
+; V6M-NEXT: bics r5, r1
+; V6M-NEXT: mov r0, r4
+; V6M-NEXT: mov r1, r5
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %notmask = shl i64 -1, %numlowbits
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bextr64_b1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsr.w r12, r0, r2
+; V7M-NEXT: rsb.w r0, r2, #32
+; V7M-NEXT: lsl.w r0, r1, r0
+; V7M-NEXT: orr.w r12, r12, r0
+; V7M-NEXT: subs.w r0, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r12, r1, r0
+; V7M-NEXT: lsr.w r0, r1, r2
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: subs.w r1, r3, #32
+; V7M-NEXT: lsl.w r3, r2, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl r2, r1
+; V7M-NEXT: bic.w r1, r0, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: bic.w r0, r12, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_b1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r12, r0, r2
+; V7A-NEXT: rsb r0, r2, #32
+; V7A-NEXT: orr r12, r12, r1, lsl r0
+; V7A-NEXT: subs r0, r2, #32
+; V7A-NEXT: lsrpl r12, r1, r0
+; V7A-NEXT: lsr r0, r1, r2
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: subs r1, r3, #32
+; V7A-NEXT: mvn r2, #0
+; V7A-NEXT: lsl r3, r2, r3
+; V7A-NEXT: lslpl r2, r2, r1
+; V7A-NEXT: bic r1, r0, r2
+; V7A-NEXT: movwpl r3, #0
+; V7A-NEXT: bic r0, r12, r3
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_b1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsr.w r12, r0, r2
+; V7A-T-NEXT: rsb.w r0, r2, #32
+; V7A-T-NEXT: lsl.w r0, r1, r0
+; V7A-T-NEXT: orr.w r12, r12, r0
+; V7A-T-NEXT: subs.w r0, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r12, r1, r0
+; V7A-T-NEXT: lsr.w r0, r1, r2
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: subs.w r1, r3, #32
+; V7A-T-NEXT: lsl.w r3, r2, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r2, r1
+; V7A-T-NEXT: bic.w r1, r0, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r3, #0
+; V7A-T-NEXT: bic.w r0, r12, r3
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_b1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, lr}
+; V6M-NEXT: push {r4, r5, r6, lr}
+; V6M-NEXT: mov r4, r3
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: mov r6, r1
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r5, r0
+; V6M-NEXT: bics r6, r1
+; V6M-NEXT: mov r0, r5
+; V6M-NEXT: mov r1, r6
+; V6M-NEXT: pop {r4, r5, r6, pc}
+ %skip = zext i8 %numskipbits to i64
+ %shifted = lshr i64 %val, %skip
+ %conv = zext i8 %numlowbits to i64
+ %notmask = shl i64 -1, %conv
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_b2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_b2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: ldrd r0, r3, [r0]
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: lsl.w r1, r3, r1
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: orrs r0, r1
+; V7M-NEXT: subs.w r1, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r3, r1
+; V7M-NEXT: lsr.w r1, r3, r2
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: lsl.w r3, r2, r12
+; V7M-NEXT: subs.w lr, r12, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r2, r2, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: bics r1, r2
+; V7M-NEXT: bics r0, r3
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bextr64_b2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: ldrd r0, r1, [r0]
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: ldr r12, [sp, #8]
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: subs r3, r2, #32
+; V7A-NEXT: lsrpl r0, r1, r3
+; V7A-NEXT: lsr r1, r1, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: subs lr, r12, #32
+; V7A-NEXT: lsl r2, r3, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: bic r0, r0, r2
+; V7A-NEXT: lslpl r3, r3, lr
+; V7A-NEXT: bic r1, r1, r3
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bextr64_b2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: ldrd r0, r3, [r0]
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: lsl.w r1, r3, r1
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: orrs r0, r1
+; V7A-T-NEXT: subs.w r1, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r3, r1
+; V7A-T-NEXT: lsr.w r1, r3, r2
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: lsl.w r2, r3, r12
+; V7A-T-NEXT: subs.w lr, r12, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r3, r3, lr
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: bics r1, r3
+; V7A-T-NEXT: bics r0, r2
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bextr64_b2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: ldr r3, [r0]
+; V6M-NEXT: ldr r1, [r0, #4]
+; V6M-NEXT: mov r0, r3
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: mov r5, r1
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: ldr r2, [sp, #16]
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r4, r0
+; V6M-NEXT: bics r5, r1
+; V6M-NEXT: mov r0, r4
+; V6M-NEXT: mov r1, r5
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %val = load i64, ptr %w
+ %shifted = lshr i64 %val, %numskipbits
+ %notmask = shl i64 -1, %numlowbits
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bextr64_b3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: ldrd r12, r0, [r0]
+; V7M-NEXT: rsb.w r3, r1, #32
+; V7M-NEXT: lsl.w lr, r0, r3
+; V7M-NEXT: lsr.w r3, r12, r1
+; V7M-NEXT: orr.w r12, r3, lr
+; V7M-NEXT: subs.w r3, r1, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r12, r0, r3
+; V7M-NEXT: lsr.w r0, r0, r1
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: subs.w r1, r2, #32
+; V7M-NEXT: lsl.w r2, r3, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl r3, r1
+; V7M-NEXT: bic.w r1, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: bic.w r0, r12, r2
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bextr64_b3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldm r0, {r0, r3}
+; V7A-NEXT: lsr r12, r0, r1
+; V7A-NEXT: rsb r0, r1, #32
+; V7A-NEXT: orr r12, r12, r3, lsl r0
+; V7A-NEXT: subs r0, r1, #32
+; V7A-NEXT: lsrpl r12, r3, r0
+; V7A-NEXT: lsr r0, r3, r1
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: subs r1, r2, #32
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsl r2, r3, r2
+; V7A-NEXT: lslpl r3, r3, r1
+; V7A-NEXT: bic r1, r0, r3
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: bic r0, r12, r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_b3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: ldrd r12, r3, [r0]
+; V7A-T-NEXT: rsb.w r0, r1, #32
+; V7A-T-NEXT: lsl.w lr, r3, r0
+; V7A-T-NEXT: lsr.w r0, r12, r1
+; V7A-T-NEXT: orr.w r12, r0, lr
+; V7A-T-NEXT: subs.w r0, r1, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r12, r3, r0
+; V7A-T-NEXT: lsr.w r0, r3, r1
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: subs.w r1, r2, #32
+; V7A-T-NEXT: lsl.w r2, r3, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r3, r1
+; V7A-T-NEXT: bic.w r1, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: bic.w r0, r12, r2
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bextr64_b3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, lr}
+; V6M-NEXT: push {r4, r5, r6, lr}
+; V6M-NEXT: mov r4, r2
+; V6M-NEXT: mov r2, r1
+; V6M-NEXT: ldr r3, [r0]
+; V6M-NEXT: ldr r1, [r0, #4]
+; V6M-NEXT: mov r0, r3
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: mov r6, r1
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r5, r0
+; V6M-NEXT: bics r6, r1
+; V6M-NEXT: mov r0, r5
+; V6M-NEXT: mov r1, r6
+; V6M-NEXT: pop {r4, r5, r6, pc}
+ %val = load i64, ptr %w
+ %skip = zext i8 %numskipbits to i64
+ %shifted = lshr i64 %val, %skip
+ %conv = zext i8 %numlowbits to i64
+ %notmask = shl i64 -1, %conv
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_b4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orrs r0, r3
+; V7M-NEXT: subs.w r3, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r3
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: lsl.w r3, r2, r12
+; V7M-NEXT: subs.w lr, r12, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r2, r2, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: bics r1, r2
+; V7M-NEXT: bics r0, r3
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bextr64_b4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldr r12, [sp, #8]
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: subs r3, r2, #32
+; V7A-NEXT: lsrpl r0, r1, r3
+; V7A-NEXT: lsr r1, r1, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: subs lr, r12, #32
+; V7A-NEXT: lsl r2, r3, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: bic r0, r0, r2
+; V7A-NEXT: lslpl r3, r3, lr
+; V7A-NEXT: bic r1, r1, r3
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bextr64_b4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, r5, r7, lr}
+; V7A-T-NEXT: push {r4, r5, r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: ldr.w r12, [sp, #16]
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r5, r0, r3
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: subs.w lr, r12, #32
+; V7A-T-NEXT: lsl.w r0, r3, r12
+; V7A-T-NEXT: itt pl
+; V7A-T-NEXT: lslpl.w r3, r3, lr
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: subs.w r4, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r5, r1, r4
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: bic.w r0, r5, r0
+; V7A-T-NEXT: bics r1, r3
+; V7A-T-NEXT: pop {r4, r5, r7, pc}
+;
+; V6M-LABEL: bextr64_b4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: mov r5, r1
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: ldr r2, [sp, #16]
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r4, r0
+; V6M-NEXT: bics r5, r1
+; V6M-NEXT: mov r0, r4
+; V6M-NEXT: mov r1, r5
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %notmask = shl i64 -1, %numlowbits
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %shifted, %mask ; swapped order
+ ret i64 %masked
+}
+
+; 64-bit, but with 32-bit output
+
+; Everything done in 64-bit, truncation happens last.
+define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_b0:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldrb.w r1, [sp]
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: lsls r2, r1
+; V7M-NEXT: subs r1, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: bics r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_b0:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldrb r12, [sp]
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: lsl r1, r1, r12
+; V7A-NEXT: subs r2, r12, #32
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: bic r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_32_b0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsr.w r12, r0, r2
+; V7A-T-NEXT: rsb.w r0, r2, #32
+; V7A-T-NEXT: ldrb.w r3, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r0, r1, r0
+; V7A-T-NEXT: orr.w r0, r0, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: mov.w r1, #-1
+; V7A-T-NEXT: lsls r1, r3
+; V7A-T-NEXT: subs.w r2, r3, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: bics r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_b0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: add r1, sp, #8
+; V6M-NEXT: ldrb r2, [r1]
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r4, r0
+; V6M-NEXT: mov r0, r4
+; V6M-NEXT: pop {r4, pc}
+ %shiftedval = lshr i64 %val, %numskipbits
+ %widenumlowbits = zext i8 %numlowbits to i64
+ %notmask = shl nsw i64 -1, %widenumlowbits
+ %mask = xor i64 %notmask, -1
+ %wideres = and i64 %shiftedval, %mask
+ %res = trunc i64 %wideres to i32
+ ret i32 %res
+}
+
+; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
+define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_b1:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldrb.w r1, [sp]
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: bics r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_b1:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldrb r12, [sp]
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: bic r0, r0, r1, lsl r12
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_32_b1:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldrb.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: mov.w r1, #-1
+; V7A-T-NEXT: lsl.w r1, r1, r12
+; V7A-T-NEXT: bics r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_b1:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r7, lr}
+; V6M-NEXT: push {r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: add r1, sp, #8
+; V6M-NEXT: ldrb r1, [r1]
+; V6M-NEXT: movs r2, #0
+; V6M-NEXT: mvns r2, r2
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: bics r0, r2
+; V6M-NEXT: pop {r7, pc}
+ %shiftedval = lshr i64 %val, %numskipbits
+ %truncshiftedval = trunc i64 %shiftedval to i32
+ %widenumlowbits = zext i8 %numlowbits to i32
+ %notmask = shl nsw i32 -1, %widenumlowbits
+ %mask = xor i32 %notmask, -1
+ %res = and i32 %truncshiftedval, %mask
+ ret i32 %res
+}
+
+; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_b2:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldrb.w r1, [sp]
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: bics r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_b2:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldrb r12, [sp]
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: bic r0, r0, r1, lsl r12
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_32_b2:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldrb.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: mov.w r1, #-1
+; V7A-T-NEXT: lsl.w r1, r1, r12
+; V7A-T-NEXT: bics r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_b2:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r7, lr}
+; V6M-NEXT: push {r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: add r1, sp, #8
+; V6M-NEXT: ldrb r1, [r1]
+; V6M-NEXT: movs r2, #0
+; V6M-NEXT: mvns r2, r2
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: bics r0, r2
+; V6M-NEXT: pop {r7, pc}
+ %shiftedval = lshr i64 %val, %numskipbits
+ %widenumlowbits = zext i8 %numlowbits to i32
+ %notmask = shl nsw i32 -1, %widenumlowbits
+ %mask = xor i32 %notmask, -1
+ %zextmask = zext i32 %mask to i64
+ %wideres = and i64 %shiftedval, %zextmask
+ %res = trunc i64 %wideres to i32
+ ret i32 %res
+}
+
+; ---------------------------------------------------------------------------- ;
+; Pattern c. 32-bit
+; ---------------------------------------------------------------------------- ;
+
+define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_c0:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_c0:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_c0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_c0:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r3, #32
+; V6M-NEXT: subs r2, r3, r2
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: lsls r0, r2
+; V6M-NEXT: lsrs r0, r2
+; V6M-NEXT: bx lr
+ %shifted = lshr i32 %val, %numskipbits
+ %numhighbits = sub i32 32, %numlowbits
+ %mask = lshr i32 -1, %numhighbits
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_c1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_c1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_c1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_c1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #32
+; V6M-NEXT: subs r1, r1, r2
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %skip = zext i8 %numskipbits to i32
+ %shifted = lshr i32 %val, %skip
+ %numhighbits = sub i8 32, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i32
+ %mask = lshr i32 -1, %sh_prom
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_c2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_c2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_c2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_c2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r3, #32
+; V6M-NEXT: subs r2, r3, r2
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: lsls r0, r2
+; V6M-NEXT: lsrs r0, r2
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %shifted = lshr i32 %val, %numskipbits
+ %numhighbits = sub i32 32, %numlowbits
+ %mask = lshr i32 -1, %numhighbits
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_c3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_c3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_c3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_c3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #32
+; V6M-NEXT: subs r1, r1, r2
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %skip = zext i8 %numskipbits to i32
+ %shifted = lshr i32 %val, %skip
+ %numhighbits = sub i8 32, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i32
+ %mask = lshr i32 -1, %sh_prom
+ %masked = and i32 %mask, %shifted
+ ret i32 %masked
+}
+
+define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_c4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_c4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_c4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_c4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r3, #32
+; V6M-NEXT: subs r2, r3, r2
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: lsls r0, r2
+; V6M-NEXT: lsrs r0, r2
+; V6M-NEXT: bx lr
+ %shifted = lshr i32 %val, %numskipbits
+ %numhighbits = sub i32 32, %numlowbits
+ %mask = lshr i32 -1, %numhighbits
+ %masked = and i32 %shifted, %mask ; swapped order
+ ret i32 %masked
+}
+
+; 64-bit
+
+define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_c0:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: ldr.w r12, [sp]
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orrs r0, r3
+; V7M-NEXT: subs.w r3, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r3
+; V7M-NEXT: rsb.w r3, r12, #64
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: lsr.w r3, r2, r3
+; V7M-NEXT: rsbs.w r12, r12, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r2, r2, r12
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_c0:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r11, lr}
+; V7A-NEXT: push {r4, r5, r11, lr}
+; V7A-NEXT: ldr r12, [sp, #16]
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsr r5, r1, r2
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: rsb r4, r12, #64
+; V7A-NEXT: rsbs lr, r12, #32
+; V7A-NEXT: lsr r4, r3, r4
+; V7A-NEXT: lsrpl r3, r3, lr
+; V7A-NEXT: movwpl r4, #0
+; V7A-NEXT: subs lr, r2, #32
+; V7A-NEXT: rsb r2, r2, #32
+; V7A-NEXT: movwpl r5, #0
+; V7A-NEXT: and r12, r4, r5
+; V7A-NEXT: orr r0, r0, r1, lsl r2
+; V7A-NEXT: lsrpl r0, r1, lr
+; V7A-NEXT: mov r1, r12
+; V7A-NEXT: and r0, r3, r0
+; V7A-NEXT: pop {r4, r5, r11, pc}
+;
+; V7A-T-LABEL: bextr64_c0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: mov.w lr, #-1
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orrs r0, r3
+; V7A-T-NEXT: subs.w r3, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r3
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: rsbs.w r2, r12, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl r3, r2
+; V7A-T-NEXT: rsb.w r2, r12, #64
+; V7A-T-NEXT: and.w r0, r0, r3
+; V7A-T-NEXT: lsr.w r2, lr, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: ands r1, r2
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bextr64_c0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: ldr r0, [sp, #16]
+; V6M-NEXT: movs r1, #64
+; V6M-NEXT: subs r2, r1, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %numhighbits = sub i64 64, %numlowbits
+ %mask = lshr i64 -1, %numhighbits
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_c1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: uxtb r2, r2
+; V7M-NEXT: lsr.w r12, r0, r2
+; V7M-NEXT: rsb.w r0, r2, #32
+; V7M-NEXT: lsl.w r0, r1, r0
+; V7M-NEXT: orr.w r12, r12, r0
+; V7M-NEXT: subs.w r0, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r12, r1, r0
+; V7M-NEXT: rsb.w r0, r3, #64
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: uxtb r0, r0
+; V7M-NEXT: subs.w lr, r0, #32
+; V7M-NEXT: lsr.w r2, r3, r0
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r3, r3, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: and.w r0, r3, r12
+; V7M-NEXT: ands r1, r2
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bextr64_c1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, lr}
+; V7A-NEXT: push {r4, lr}
+; V7A-NEXT: uxtb r12, r2
+; V7A-NEXT: lsr lr, r0, r12
+; V7A-NEXT: rsb r0, r12, #32
+; V7A-NEXT: orr r4, lr, r1, lsl r0
+; V7A-NEXT: mvn lr, #31
+; V7A-NEXT: uxtab r2, lr, r2
+; V7A-NEXT: cmp r2, #0
+; V7A-NEXT: lsrpl r4, r1, r2
+; V7A-NEXT: rsb r2, r3, #64
+; V7A-NEXT: lsr r1, r1, r12
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: uxtb r12, r2
+; V7A-NEXT: uxtab r2, lr, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: lsr r0, r3, r12
+; V7A-NEXT: cmp r2, #0
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: and r1, r0, r1
+; V7A-NEXT: lsrpl r3, r3, r2
+; V7A-NEXT: and r0, r3, r4
+; V7A-NEXT: pop {r4, pc}
+;
+; V7A-T-LABEL: bextr64_c1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: uxtb.w r12, r2
+; V7A-T-NEXT: lsr.w lr, r0, r12
+; V7A-T-NEXT: rsb.w r0, r12, #32
+; V7A-T-NEXT: lsl.w r0, r1, r0
+; V7A-T-NEXT: orr.w r4, lr, r0
+; V7A-T-NEXT: mvn lr, #31
+; V7A-T-NEXT: uxtab r2, lr, r2
+; V7A-T-NEXT: cmp r2, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r4, r1, r2
+; V7A-T-NEXT: rsb.w r2, r3, #64
+; V7A-T-NEXT: lsr.w r1, r1, r12
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: uxtb.w r12, r2
+; V7A-T-NEXT: uxtab r2, lr, r2
+; V7A-T-NEXT: lsr.w r0, r3, r12
+; V7A-T-NEXT: cmp r2, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: and.w r1, r1, r0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl r3, r2
+; V7A-T-NEXT: and.w r0, r3, r4
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_c1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, lr}
+; V6M-NEXT: push {r4, r5, r6, lr}
+; V6M-NEXT: mov r5, r3
+; V6M-NEXT: uxtb r2, r2
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r6, r0
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: movs r0, #64
+; V6M-NEXT: subs r0, r0, r5
+; V6M-NEXT: uxtb r2, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r6
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r6, pc}
+ %skip = zext i8 %numskipbits to i64
+ %shifted = lshr i64 %val, %skip
+ %numhighbits = sub i8 64, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i64
+ %mask = lshr i64 -1, %sh_prom
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_c2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldrd r0, r3, [r0]
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: ldr.w r12, [sp]
+; V7M-NEXT: lsl.w r1, r3, r1
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: orrs r0, r1
+; V7M-NEXT: subs.w r1, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r3, r1
+; V7M-NEXT: lsr.w r1, r3, r2
+; V7M-NEXT: rsb.w r3, r12, #64
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: rsbs.w r12, r12, #32
+; V7M-NEXT: lsr.w r3, r2, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r2, r2, r12
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_c2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r6, r8, lr}
+; V7A-NEXT: push {r4, r6, r8, lr}
+; V7A-NEXT: ldr r12, [sp, #16]
+; V7A-NEXT: ldr r3, [r0, #4]
+; V7A-NEXT: rsb r6, r12, #64
+; V7A-NEXT: ldr r8, [r0]
+; V7A-NEXT: mvn r0, #0
+; V7A-NEXT: rsbs r1, r12, #32
+; V7A-NEXT: lsr r6, r0, r6
+; V7A-NEXT: lsr r4, r3, r2
+; V7A-NEXT: lsrpl r0, r0, r1
+; V7A-NEXT: movwpl r6, #0
+; V7A-NEXT: subs r12, r2, #32
+; V7A-NEXT: movwpl r4, #0
+; V7A-NEXT: and r1, r6, r4
+; V7A-NEXT: lsr r6, r8, r2
+; V7A-NEXT: rsb r2, r2, #32
+; V7A-NEXT: orr r2, r6, r3, lsl r2
+; V7A-NEXT: lsrpl r2, r3, r12
+; V7A-NEXT: and r0, r0, r2
+; V7A-NEXT: pop {r4, r6, r8, pc}
+;
+; V7A-T-LABEL: bextr64_c2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldrd r0, r3, [r0]
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: ldr.w r12, [sp]
+; V7A-T-NEXT: lsl.w r1, r3, r1
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: orrs r0, r1
+; V7A-T-NEXT: subs.w r1, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r3, r1
+; V7A-T-NEXT: lsr.w r1, r3, r2
+; V7A-T-NEXT: rsb.w r2, r12, #64
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: rsbs.w r12, r12, #32
+; V7A-T-NEXT: lsr.w r2, r3, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r3, r3, r12
+; V7A-T-NEXT: ands r1, r2
+; V7A-T-NEXT: ands r0, r3
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_c2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: ldr r3, [r0]
+; V6M-NEXT: ldr r1, [r0, #4]
+; V6M-NEXT: mov r0, r3
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: ldr r0, [sp, #16]
+; V6M-NEXT: movs r1, #64
+; V6M-NEXT: subs r2, r1, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %val = load i64, ptr %w
+ %shifted = lshr i64 %val, %numskipbits
+ %numhighbits = sub i64 64, %numlowbits
+ %mask = lshr i64 -1, %numhighbits
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_c3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: ldrd r0, r3, [r0]
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsr.w r12, r0, r1
+; V7M-NEXT: rsb.w r0, r1, #32
+; V7M-NEXT: lsl.w r0, r3, r0
+; V7M-NEXT: orr.w r12, r12, r0
+; V7M-NEXT: subs.w r0, r1, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r12, r3, r0
+; V7M-NEXT: rsb.w r0, r2, #64
+; V7M-NEXT: lsr.w r1, r3, r1
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: uxtb r0, r0
+; V7M-NEXT: subs.w lr, r0, #32
+; V7M-NEXT: lsr.w r2, r3, r0
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r3, r3, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: and.w r0, r3, r12
+; V7M-NEXT: ands r1, r2
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bextr64_c3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, lr}
+; V7A-NEXT: push {r4, lr}
+; V7A-NEXT: ldr r4, [r0]
+; V7A-NEXT: ldr r3, [r0, #4]
+; V7A-NEXT: uxtb r0, r1
+; V7A-NEXT: lsr r12, r4, r0
+; V7A-NEXT: rsb r4, r0, #32
+; V7A-NEXT: lsr r0, r3, r0
+; V7A-NEXT: orr lr, r12, r3, lsl r4
+; V7A-NEXT: mvn r12, #31
+; V7A-NEXT: uxtab r1, r12, r1
+; V7A-NEXT: cmp r1, #0
+; V7A-NEXT: lsrpl lr, r3, r1
+; V7A-NEXT: rsb r1, r2, #64
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: uxtb r2, r1
+; V7A-NEXT: uxtab r4, r12, r1
+; V7A-NEXT: lsr r2, r3, r2
+; V7A-NEXT: cmp r4, #0
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: and r1, r2, r0
+; V7A-NEXT: lsrpl r3, r3, r4
+; V7A-NEXT: and r0, r3, lr
+; V7A-NEXT: pop {r4, pc}
+;
+; V7A-T-LABEL: bextr64_c3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, r5, r7, lr}
+; V7A-T-NEXT: push {r4, r5, r7, lr}
+; V7A-T-NEXT: ldrd r12, lr, [r0]
+; V7A-T-NEXT: uxtb r0, r1
+; V7A-T-NEXT: rsb.w r3, r0, #32
+; V7A-T-NEXT: lsl.w r4, lr, r3
+; V7A-T-NEXT: lsr.w r3, r12, r0
+; V7A-T-NEXT: orr.w r5, r3, r4
+; V7A-T-NEXT: mvn r12, #31
+; V7A-T-NEXT: uxtab r1, r12, r1
+; V7A-T-NEXT: lsr.w r0, lr, r0
+; V7A-T-NEXT: cmp r1, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r5, lr, r1
+; V7A-T-NEXT: rsb.w r1, r2, #64
+; V7A-T-NEXT: mov.w r4, #-1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: uxtb r2, r1
+; V7A-T-NEXT: uxtab r3, r12, r1
+; V7A-T-NEXT: lsr.w r2, r4, r2
+; V7A-T-NEXT: cmp r3, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: and.w r1, r2, r0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl r4, r3
+; V7A-T-NEXT: and.w r0, r4, r5
+; V7A-T-NEXT: pop {r4, r5, r7, pc}
+;
+; V6M-LABEL: bextr64_c3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, lr}
+; V6M-NEXT: push {r4, r5, r6, lr}
+; V6M-NEXT: mov r5, r2
+; V6M-NEXT: ldr r4, [r0]
+; V6M-NEXT: ldr r3, [r0, #4]
+; V6M-NEXT: uxtb r2, r1
+; V6M-NEXT: mov r0, r4
+; V6M-NEXT: mov r1, r3
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r6, r0
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: movs r0, #64
+; V6M-NEXT: subs r0, r0, r5
+; V6M-NEXT: uxtb r2, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r6
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r6, pc}
+ %val = load i64, ptr %w
+ %skip = zext i8 %numskipbits to i64
+ %shifted = lshr i64 %val, %skip
+ %numhighbits = sub i8 64, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i64
+ %mask = lshr i64 -1, %sh_prom
+ %masked = and i64 %mask, %shifted
+ ret i64 %masked
+}
+
+define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_c4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: ldr.w r12, [sp]
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orrs r0, r3
+; V7M-NEXT: subs.w r3, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r3
+; V7M-NEXT: rsb.w r3, r12, #64
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: lsr.w r3, r2, r3
+; V7M-NEXT: rsbs.w r12, r12, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r2, r2, r12
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_c4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r11, lr}
+; V7A-NEXT: push {r4, r5, r11, lr}
+; V7A-NEXT: ldr r12, [sp, #16]
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsr r5, r1, r2
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: rsb r4, r12, #64
+; V7A-NEXT: rsbs lr, r12, #32
+; V7A-NEXT: lsr r4, r3, r4
+; V7A-NEXT: lsrpl r3, r3, lr
+; V7A-NEXT: movwpl r4, #0
+; V7A-NEXT: subs lr, r2, #32
+; V7A-NEXT: rsb r2, r2, #32
+; V7A-NEXT: movwpl r5, #0
+; V7A-NEXT: and r12, r5, r4
+; V7A-NEXT: orr r0, r0, r1, lsl r2
+; V7A-NEXT: lsrpl r0, r1, lr
+; V7A-NEXT: mov r1, r12
+; V7A-NEXT: and r0, r0, r3
+; V7A-NEXT: pop {r4, r5, r11, pc}
+;
+; V7A-T-LABEL: bextr64_c4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: mov.w lr, #-1
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orrs r0, r3
+; V7A-T-NEXT: subs.w r3, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r3
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: rsbs.w r2, r12, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl r3, r2
+; V7A-T-NEXT: rsb.w r2, r12, #64
+; V7A-T-NEXT: and.w r0, r0, r3
+; V7A-T-NEXT: lsr.w r2, lr, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: ands r1, r2
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bextr64_c4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: ldr r0, [sp, #16]
+; V6M-NEXT: movs r1, #64
+; V6M-NEXT: subs r2, r1, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %numhighbits = sub i64 64, %numlowbits
+ %mask = lshr i64 -1, %numhighbits
+ %masked = and i64 %shifted, %mask ; swapped order
+ ret i64 %masked
+}
+
+; 64-bit, but with 32-bit output
+
+; Everything done in 64-bit, truncation happens last.
+define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_c0:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldr r1, [sp]
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: rsbs.w r1, r1, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl r2, r1
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_c0:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r3, [sp]
+; V7A-NEXT: rsbs r12, r3, #32
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsrpl r3, r3, r12
+; V7A-NEXT: lsr r12, r0, r2
+; V7A-NEXT: rsb r0, r2, #32
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r12, r1, lsl r0
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: and r0, r3, r0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_32_c0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: rsbs.w r1, r12, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl r2, r1
+; V7A-T-NEXT: ands r0, r2
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_c0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: ldr r0, [sp, #8]
+; V6M-NEXT: movs r1, #64
+; V6M-NEXT: subs r2, r1, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r4
+; V6M-NEXT: pop {r4, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %numhighbits = sub i64 64, %numlowbits
+ %mask = lshr i64 -1, %numhighbits
+ %masked = and i64 %mask, %shifted
+ %res = trunc i64 %masked to i32
+ ret i32 %res
+}
+
+; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
+define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_c1:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldr r1, [sp]
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_c1:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldr r12, [sp]
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: rsb r1, r12, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_32_c1:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: rsb.w r1, r12, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_c1:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r7, lr}
+; V6M-NEXT: push {r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldr r1, [sp, #8]
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: pop {r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %truncshifted = trunc i64 %shifted to i32
+ %numhighbits = sub i32 32, %numlowbits
+ %mask = lshr i32 -1, %numhighbits
+ %masked = and i32 %mask, %truncshifted
+ ret i32 %masked
+}
+
+; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_c2:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldr r1, [sp]
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_c2:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldr r12, [sp]
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: rsb r1, r12, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_32_c2:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: rsb.w r1, r12, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_c2:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r7, lr}
+; V6M-NEXT: push {r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldr r1, [sp, #8]
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: pop {r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %numhighbits = sub i32 32, %numlowbits
+ %mask = lshr i32 -1, %numhighbits
+ %zextmask = zext i32 %mask to i64
+ %masked = and i64 %zextmask, %shifted
+ %truncmasked = trunc i64 %masked to i32
+ ret i32 %truncmasked
+}
+
+; ---------------------------------------------------------------------------- ;
+; Pattern d. 32-bit.
+; ---------------------------------------------------------------------------- ;
+
+define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_d0:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_d0:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_d0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_d0:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r3, #32
+; V6M-NEXT: subs r2, r3, r2
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: lsls r0, r2
+; V6M-NEXT: lsrs r0, r2
+; V6M-NEXT: bx lr
+ %shifted = lshr i32 %val, %numskipbits
+ %numhighbits = sub i32 32, %numlowbits
+ %highbitscleared = shl i32 %shifted, %numhighbits
+ %masked = lshr i32 %highbitscleared, %numhighbits
+ ret i32 %masked
+}
+
+define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_d1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_d1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_d1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_d1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #32
+; V6M-NEXT: subs r1, r1, r2
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %skip = zext i8 %numskipbits to i32
+ %shifted = lshr i32 %val, %skip
+ %numhighbits = sub i8 32, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i32
+ %highbitscleared = shl i32 %shifted, %sh_prom
+ %masked = lshr i32 %highbitscleared, %sh_prom
+ ret i32 %masked
+}
+
+define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_d2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_d2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_d2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_d2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r3, #32
+; V6M-NEXT: subs r2, r3, r2
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: lsls r0, r2
+; V6M-NEXT: lsrs r0, r2
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %shifted = lshr i32 %val, %numskipbits
+ %numhighbits = sub i32 32, %numlowbits
+ %highbitscleared = shl i32 %shifted, %numhighbits
+ %masked = lshr i32 %highbitscleared, %numhighbits
+ ret i32 %masked
+}
+
+define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr32_d3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr32_d3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: rsb r1, r2, #32
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr32_d3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr32_d3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: movs r1, #32
+; V6M-NEXT: subs r1, r1, r2
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %skip = zext i8 %numskipbits to i32
+ %shifted = lshr i32 %val, %skip
+ %numhighbits = sub i8 32, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i32
+ %highbitscleared = shl i32 %shifted, %sh_prom
+ %masked = lshr i32 %highbitscleared, %sh_prom
+ ret i32 %masked
+}
+
+; 64-bit.
+
+define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_d0:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orrs r0, r3
+; V7M-NEXT: subs.w r3, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r3
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: rsb.w r3, r12, #64
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: rsb.w lr, r12, #32
+; V7M-NEXT: rsb.w r12, r3, #32
+; V7M-NEXT: lsls r1, r3
+; V7M-NEXT: cmp.w lr, #0
+; V7M-NEXT: lsr.w r4, r0, r12
+; V7M-NEXT: orr.w r1, r1, r4
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, r0, lr
+; V7M-NEXT: lsl.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsl.w r2, r1, r12
+; V7M-NEXT: lsr.w r0, r0, r3
+; V7M-NEXT: orr.w r0, r0, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, lr
+; V7M-NEXT: lsr.w r1, r1, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_d0:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: lsr r3, r1, r2
+; V7A-NEXT: subs lr, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: rsb r2, r2, #32
+; V7A-NEXT: ldr r12, [sp, #8]
+; V7A-NEXT: movwpl r3, #0
+; V7A-NEXT: orr r0, r0, r1, lsl r2
+; V7A-NEXT: lsrpl r0, r1, lr
+; V7A-NEXT: rsb r1, r12, #64
+; V7A-NEXT: rsb lr, r1, #32
+; V7A-NEXT: lsr r2, r0, lr
+; V7A-NEXT: orr r2, r2, r3, lsl r1
+; V7A-NEXT: rsbs r3, r12, #32
+; V7A-NEXT: lslpl r2, r0, r3
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: lsr r1, r2, r1
+; V7A-NEXT: orr r0, r0, r2, lsl lr
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: lsrpl r0, r2, r3
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bextr64_d0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orrs r0, r3
+; V7A-T-NEXT: subs.w r3, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r3
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: rsb.w r3, r12, #64
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: rsb.w lr, r3, #32
+; V7A-T-NEXT: lsls r1, r3
+; V7A-T-NEXT: rsbs.w r2, r12, #32
+; V7A-T-NEXT: lsr.w r4, r0, lr
+; V7A-T-NEXT: orr.w r1, r1, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r1, r0, r2
+; V7A-T-NEXT: lsl.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsl.w r4, r1, lr
+; V7A-T-NEXT: lsr.w r0, r0, r3
+; V7A-T-NEXT: orr.w r0, r0, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: lsr.w r1, r1, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_d0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldr r2, [sp, #8]
+; V6M-NEXT: movs r3, #64
+; V6M-NEXT: subs r4, r3, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %numhighbits = sub i64 64, %numlowbits
+ %highbitscleared = shl i64 %shifted, %numhighbits
+ %masked = lshr i64 %highbitscleared, %numhighbits
+ ret i64 %masked
+}
+
+define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_d1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: uxtb.w lr, r2
+; V7M-NEXT: subs.w r2, lr, #32
+; V7M-NEXT: lsr.w r12, r0, lr
+; V7M-NEXT: rsb.w r0, lr, #32
+; V7M-NEXT: lsl.w r0, r1, r0
+; V7M-NEXT: orr.w r0, r0, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: rsb.w r2, r3, #64
+; V7M-NEXT: lsr.w r1, r1, lr
+; V7M-NEXT: uxtb r2, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsls r1, r2
+; V7M-NEXT: sub.w r12, r2, #32
+; V7M-NEXT: lsr.w r4, r0, r3
+; V7M-NEXT: orrs r1, r4
+; V7M-NEXT: cmp.w r12, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, r0, r12
+; V7M-NEXT: lsl.w r0, r0, r2
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsr.w r0, r0, r2
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r12
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_d1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r11, lr}
+; V7A-NEXT: push {r4, r5, r11, lr}
+; V7A-NEXT: uxtb r12, r2
+; V7A-NEXT: lsr lr, r0, r12
+; V7A-NEXT: rsb r0, r12, #32
+; V7A-NEXT: orr r0, lr, r1, lsl r0
+; V7A-NEXT: mvn lr, #31
+; V7A-NEXT: uxtab r2, lr, r2
+; V7A-NEXT: cmp r2, #0
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: rsb r2, r3, #64
+; V7A-NEXT: lsr r1, r1, r12
+; V7A-NEXT: uxtb r3, r2
+; V7A-NEXT: rsb r4, r3, #32
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: uxtab r2, lr, r2
+; V7A-NEXT: lsr r5, r0, r4
+; V7A-NEXT: orr r1, r5, r1, lsl r3
+; V7A-NEXT: cmp r2, #0
+; V7A-NEXT: lslpl r1, r0, r2
+; V7A-NEXT: lsl r0, r0, r3
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lsr r0, r0, r3
+; V7A-NEXT: orr r0, r0, r1, lsl r4
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: lsr r1, r1, r3
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: pop {r4, r5, r11, pc}
+;
+; V7A-T-LABEL: bextr64_d1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, r5, r6, r7, lr}
+; V7A-T-NEXT: push {r4, r5, r6, r7, lr}
+; V7A-T-NEXT: uxtb.w r12, r2
+; V7A-T-NEXT: rsb.w r6, r12, #32
+; V7A-T-NEXT: rsb.w r3, r3, #64
+; V7A-T-NEXT: lsr.w r0, r0, r12
+; V7A-T-NEXT: mvn r7, #31
+; V7A-T-NEXT: uxtab r2, r7, r2
+; V7A-T-NEXT: lsl.w r6, r1, r6
+; V7A-T-NEXT: lsr.w lr, r1, r12
+; V7A-T-NEXT: orrs r0, r6
+; V7A-T-NEXT: cmp r2, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl.w lr, #0
+; V7A-T-NEXT: uxtb r5, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: rsb.w r1, r5, #32
+; V7A-T-NEXT: uxtab r3, r7, r3
+; V7A-T-NEXT: lsl.w r4, lr, r5
+; V7A-T-NEXT: lsr.w r2, r0, r1
+; V7A-T-NEXT: cmp r3, #0
+; V7A-T-NEXT: orr.w r2, r2, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r2, r0, r3
+; V7A-T-NEXT: lsl.w r0, r0, r5
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: lsr.w r0, r0, r5
+; V7A-T-NEXT: orr.w r0, r0, r1
+; V7A-T-NEXT: lsr.w r1, r2, r5
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r2, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; V6M-LABEL: bextr64_d1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: mov r4, r3
+; V6M-NEXT: uxtb r2, r2
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: movs r2, #64
+; V6M-NEXT: subs r2, r2, r4
+; V6M-NEXT: uxtb r4, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, pc}
+ %skip = zext i8 %numskipbits to i64
+ %shifted = lshr i64 %val, %skip
+ %numhighbits = sub i8 64, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i64
+ %highbitscleared = shl i64 %shifted, %sh_prom
+ %masked = lshr i64 %highbitscleared, %sh_prom
+ ret i64 %masked
+}
+
+define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_d2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: ldrd r0, r3, [r0]
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: lsl.w r1, r3, r1
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: rsb.w lr, r12, #32
+; V7M-NEXT: orrs r0, r1
+; V7M-NEXT: subs.w r1, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r3, r1
+; V7M-NEXT: rsb.w r1, r12, #64
+; V7M-NEXT: lsr.w r2, r3, r2
+; V7M-NEXT: rsb.w r12, r1, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: cmp.w lr, #0
+; V7M-NEXT: lsl.w r2, r2, r1
+; V7M-NEXT: lsr.w r4, r0, r12
+; V7M-NEXT: orr.w r2, r2, r4
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r2, r0, lr
+; V7M-NEXT: lsl.w r0, r0, r1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsl.w r3, r2, r12
+; V7M-NEXT: lsr.w r0, r0, r1
+; V7M-NEXT: lsr.w r1, r2, r1
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r2, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_d2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: ldrd r0, r1, [r0]
+; V7A-NEXT: subs lr, r2, #32
+; V7A-NEXT: lsr r3, r1, r2
+; V7A-NEXT: ldr r12, [sp, #8]
+; V7A-NEXT: movwpl r3, #0
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: rsb r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r2
+; V7A-NEXT: lsrpl r0, r1, lr
+; V7A-NEXT: rsb r1, r12, #64
+; V7A-NEXT: rsb lr, r1, #32
+; V7A-NEXT: lsr r2, r0, lr
+; V7A-NEXT: orr r2, r2, r3, lsl r1
+; V7A-NEXT: rsbs r3, r12, #32
+; V7A-NEXT: lslpl r2, r0, r3
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: lsr r1, r2, r1
+; V7A-NEXT: orr r0, r0, r2, lsl lr
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: lsrpl r0, r2, r3
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bextr64_d2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: ldrd r0, r3, [r0]
+; V7A-T-NEXT: rsb.w r1, r2, #32
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: lsl.w r1, r3, r1
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: orrs r0, r1
+; V7A-T-NEXT: subs.w r1, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r3, r1
+; V7A-T-NEXT: lsr.w r2, r3, r2
+; V7A-T-NEXT: rsb.w r1, r12, #64
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: rsb.w lr, r1, #32
+; V7A-T-NEXT: rsbs.w r3, r12, #32
+; V7A-T-NEXT: lsl.w r2, r2, r1
+; V7A-T-NEXT: lsr.w r4, r0, lr
+; V7A-T-NEXT: orr.w r2, r2, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r2, r0, r3
+; V7A-T-NEXT: lsl.w r0, r0, r1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsl.w r4, r2, lr
+; V7A-T-NEXT: lsr.w r0, r0, r1
+; V7A-T-NEXT: lsr.w r1, r2, r1
+; V7A-T-NEXT: orr.w r0, r0, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r2, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_d2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: ldr r3, [r0]
+; V6M-NEXT: ldr r1, [r0, #4]
+; V6M-NEXT: mov r0, r3
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldr r2, [sp, #8]
+; V6M-NEXT: movs r3, #64
+; V6M-NEXT: subs r4, r3, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, pc}
+ %val = load i64, ptr %w
+ %shifted = lshr i64 %val, %numskipbits
+ %numhighbits = sub i64 64, %numlowbits
+ %highbitscleared = shl i64 %shifted, %numhighbits
+ %masked = lshr i64 %highbitscleared, %numhighbits
+ ret i64 %masked
+}
+
+define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_d3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: ldrd r0, lr, [r0]
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: rsb.w r2, r2, #64
+; V7M-NEXT: subs.w r3, r1, #32
+; V7M-NEXT: lsr.w r12, r0, r1
+; V7M-NEXT: rsb.w r0, r1, #32
+; V7M-NEXT: lsr.w r1, lr, r1
+; V7M-NEXT: uxtb r2, r2
+; V7M-NEXT: lsl.w r0, lr, r0
+; V7M-NEXT: orr.w r0, r0, r12
+; V7M-NEXT: sub.w r12, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, lr, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsls r1, r2
+; V7M-NEXT: cmp.w r12, #0
+; V7M-NEXT: lsr.w r4, r0, r3
+; V7M-NEXT: orr.w r1, r1, r4
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, r0, r12
+; V7M-NEXT: lsl.w r0, r0, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: lsr.w r0, r0, r2
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r12
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_d3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r5, r11, lr}
+; V7A-NEXT: push {r4, r5, r11, lr}
+; V7A-NEXT: ldr r4, [r0]
+; V7A-NEXT: ldr r3, [r0, #4]
+; V7A-NEXT: uxtb r0, r1
+; V7A-NEXT: lsr r12, r4, r0
+; V7A-NEXT: rsb r4, r0, #32
+; V7A-NEXT: lsr r0, r3, r0
+; V7A-NEXT: orr r4, r12, r3, lsl r4
+; V7A-NEXT: mvn r12, #31
+; V7A-NEXT: uxtab r1, r12, r1
+; V7A-NEXT: cmp r1, #0
+; V7A-NEXT: lsrpl r4, r3, r1
+; V7A-NEXT: rsb r1, r2, #64
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: uxtb r2, r1
+; V7A-NEXT: rsb lr, r2, #32
+; V7A-NEXT: uxtab r1, r12, r1
+; V7A-NEXT: lsr r5, r4, lr
+; V7A-NEXT: orr r3, r5, r0, lsl r2
+; V7A-NEXT: cmp r1, #0
+; V7A-NEXT: lsl r0, r4, r2
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lslpl r3, r4, r1
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: orr r0, r0, r3, lsl lr
+; V7A-NEXT: lsrpl r0, r3, r1
+; V7A-NEXT: lsr r1, r3, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: pop {r4, r5, r11, pc}
+;
+; V7A-T-LABEL: bextr64_d3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, r5, r6, lr}
+; V7A-T-NEXT: push {r4, r5, r6, lr}
+; V7A-T-NEXT: ldrd r12, lr, [r0]
+; V7A-T-NEXT: uxtb r0, r1
+; V7A-T-NEXT: rsb.w r6, r0, #32
+; V7A-T-NEXT: lsr.w r3, lr, r0
+; V7A-T-NEXT: rsb.w r2, r2, #64
+; V7A-T-NEXT: mvn r4, #31
+; V7A-T-NEXT: lsr.w r0, r12, r0
+; V7A-T-NEXT: uxtab r1, r4, r1
+; V7A-T-NEXT: lsl.w r6, lr, r6
+; V7A-T-NEXT: orrs r0, r6
+; V7A-T-NEXT: cmp r1, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r3, #0
+; V7A-T-NEXT: uxtb r5, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, lr, r1
+; V7A-T-NEXT: rsb.w r1, r5, #32
+; V7A-T-NEXT: lsls r3, r5
+; V7A-T-NEXT: uxtab r2, r4, r2
+; V7A-T-NEXT: lsr.w r6, r0, r1
+; V7A-T-NEXT: orrs r3, r6
+; V7A-T-NEXT: cmp r2, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r3, r0, r2
+; V7A-T-NEXT: lsl.w r0, r0, r5
+; V7A-T-NEXT: lsl.w r1, r3, r1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsr.w r0, r0, r5
+; V7A-T-NEXT: orr.w r0, r0, r1
+; V7A-T-NEXT: lsr.w r1, r3, r5
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r3, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: pop {r4, r5, r6, pc}
+;
+; V6M-LABEL: bextr64_d3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r2
+; V6M-NEXT: ldr r5, [r0]
+; V6M-NEXT: ldr r3, [r0, #4]
+; V6M-NEXT: uxtb r2, r1
+; V6M-NEXT: mov r0, r5
+; V6M-NEXT: mov r1, r3
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: movs r2, #64
+; V6M-NEXT: subs r2, r2, r4
+; V6M-NEXT: uxtb r4, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %val = load i64, ptr %w
+ %skip = zext i8 %numskipbits to i64
+ %shifted = lshr i64 %val, %skip
+ %numhighbits = sub i8 64, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i64
+ %highbitscleared = shl i64 %shifted, %sh_prom
+ %masked = lshr i64 %highbitscleared, %sh_prom
+ ret i64 %masked
+}
+
+; 64-bit, but with 32-bit output
+
+; Everything done in 64-bit, truncation happens last.
+define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_d0:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r4, lr}
+; V7M-NEXT: push {r4, lr}
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: ldr.w r12, [sp, #8]
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orrs r0, r3
+; V7M-NEXT: subs.w r3, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r3
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: rsb.w r3, r12, #64
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: rsb.w lr, r12, #32
+; V7M-NEXT: rsb.w r12, r3, #32
+; V7M-NEXT: lsls r1, r3
+; V7M-NEXT: cmp.w lr, #0
+; V7M-NEXT: lsr.w r4, r0, r12
+; V7M-NEXT: orr.w r1, r1, r4
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, r0, lr
+; V7M-NEXT: lsl.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsl.w r2, r1, r12
+; V7M-NEXT: lsr.w r0, r0, r3
+; V7M-NEXT: orr.w r0, r0, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, lr
+; V7M-NEXT: pop {r4, pc}
+;
+; V7A-LABEL: bextr64_32_d0:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: lsr r3, r1, r2
+; V7A-NEXT: subs lr, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: rsb r2, r2, #32
+; V7A-NEXT: ldr r12, [sp, #8]
+; V7A-NEXT: movwpl r3, #0
+; V7A-NEXT: orr r0, r0, r1, lsl r2
+; V7A-NEXT: lsrpl r0, r1, lr
+; V7A-NEXT: rsb r1, r12, #64
+; V7A-NEXT: rsb lr, r1, #32
+; V7A-NEXT: lsr r2, r0, lr
+; V7A-NEXT: orr r2, r2, r3, lsl r1
+; V7A-NEXT: rsbs r3, r12, #32
+; V7A-NEXT: lslpl r2, r0, r3
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: orr r0, r0, r2, lsl lr
+; V7A-NEXT: lsrpl r0, r2, r3
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bextr64_32_d0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: ldr.w r12, [sp, #8]
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orrs r0, r3
+; V7A-T-NEXT: subs.w r3, r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r3
+; V7A-T-NEXT: lsr.w r1, r1, r2
+; V7A-T-NEXT: rsb.w r3, r12, #64
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: rsb.w lr, r3, #32
+; V7A-T-NEXT: lsls r1, r3
+; V7A-T-NEXT: rsbs.w r2, r12, #32
+; V7A-T-NEXT: lsr.w r4, r0, lr
+; V7A-T-NEXT: orr.w r1, r1, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r1, r0, r2
+; V7A-T-NEXT: lsl.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsl.w r4, r1, lr
+; V7A-T-NEXT: lsr.w r0, r0, r3
+; V7A-T-NEXT: orr.w r0, r0, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bextr64_32_d0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldr r2, [sp, #8]
+; V6M-NEXT: movs r3, #64
+; V6M-NEXT: subs r4, r3, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %numhighbits = sub i64 64, %numlowbits
+ %highbitscleared = shl i64 %shifted, %numhighbits
+ %masked = lshr i64 %highbitscleared, %numhighbits
+ %res = trunc i64 %masked to i32
+ ret i32 %res
+}
+
+; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
+define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
+; V7M-LABEL: bextr64_32_d1:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsrs r0, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: ldr r1, [sp]
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bextr64_32_d1:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r3, r2, #32
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: ldr r12, [sp]
+; V7A-NEXT: subs r2, r2, #32
+; V7A-NEXT: orr r0, r0, r1, lsl r3
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: rsb r1, r12, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bextr64_32_d1:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: lsrs r0, r2
+; V7A-T-NEXT: ldr.w r12, [sp]
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: lsl.w r3, r1, r3
+; V7A-T-NEXT: orr.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: rsb.w r1, r12, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bextr64_32_d1:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r7, lr}
+; V6M-NEXT: push {r7, lr}
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldr r1, [sp, #8]
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: pop {r7, pc}
+ %shifted = lshr i64 %val, %numskipbits
+ %truncshifted = trunc i64 %shifted to i32
+ %numhighbits = sub i32 32, %numlowbits
+ %highbitscleared = shl i32 %truncshifted, %numhighbits
+ %masked = lshr i32 %highbitscleared, %numhighbits
+ ret i32 %masked
+}
+
+; ---------------------------------------------------------------------------- ;
+; Constant
+; ---------------------------------------------------------------------------- ;
+
+; https://bugs.llvm.org/show_bug.cgi?id=38938
+define void @pr38938(ptr %a0, ptr %a1) nounwind {
+; V7M-LABEL: pr38938:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r1, [r1]
+; V7M-NEXT: ubfx r1, r1, #21, #10
+; V7M-NEXT: ldr.w r2, [r0, r1, lsl #2]
+; V7M-NEXT: adds r2, #1
+; V7M-NEXT: str.w r2, [r0, r1, lsl #2]
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: pr38938:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r1, [r1]
+; V7A-NEXT: ubfx r1, r1, #21, #10
+; V7A-NEXT: ldr r2, [r0, r1, lsl #2]
+; V7A-NEXT: add r2, r2, #1
+; V7A-NEXT: str r2, [r0, r1, lsl #2]
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: pr38938:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r1, [r1]
+; V7A-T-NEXT: ubfx r1, r1, #21, #10
+; V7A-T-NEXT: ldr.w r2, [r0, r1, lsl #2]
+; V7A-T-NEXT: adds r2, #1
+; V7A-T-NEXT: str.w r2, [r0, r1, lsl #2]
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: pr38938:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r1, [r1]
+; V6M-NEXT: lsrs r1, r1, #19
+; V6M-NEXT: ldr r2, .LCPI51_0
+; V6M-NEXT: ands r2, r1
+; V6M-NEXT: ldr r1, [r0, r2]
+; V6M-NEXT: adds r1, r1, #1
+; V6M-NEXT: str r1, [r0, r2]
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI51_0:
+; V6M-NEXT: .long 4092 @ 0xffc
+ %tmp = load i64, ptr %a1, align 8
+ %tmp1 = lshr i64 %tmp, 21
+ %tmp2 = and i64 %tmp1, 1023
+ %tmp3 = getelementptr inbounds i32, ptr %a0, i64 %tmp2
+ %tmp4 = load i32, ptr %tmp3, align 4
+ %tmp5 = add nsw i32 %tmp4, 1
+ store i32 %tmp5, ptr %tmp3, align 4
+ ret void
+}
+
+; The most canonical variant
+define i32 @c0_i32(i32 %arg) nounwind {
+; V7M-LABEL: c0_i32:
+; V7M: @ %bb.0:
+; V7M-NEXT: ubfx r0, r0, #19, #10
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c0_i32:
+; V7A: @ %bb.0:
+; V7A-NEXT: ubfx r0, r0, #19, #10
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c0_i32:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ubfx r0, r0, #19, #10
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c0_i32:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsls r0, r0, #3
+; V6M-NEXT: lsrs r0, r0, #22
+; V6M-NEXT: bx lr
+ %tmp0 = lshr i32 %arg, 19
+ %tmp1 = and i32 %tmp0, 1023
+ ret i32 %tmp1
+}
+
+; Should be still fine, but the mask is shifted
+define i32 @c1_i32(i32 %arg) nounwind {
+; V7M-LABEL: c1_i32:
+; V7M: @ %bb.0:
+; V7M-NEXT: movw r1, #4092
+; V7M-NEXT: and.w r0, r1, r0, lsr #19
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c1_i32:
+; V7A: @ %bb.0:
+; V7A-NEXT: movw r1, #4092
+; V7A-NEXT: and r0, r1, r0, lsr #19
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c1_i32:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movw r1, #4092
+; V7A-T-NEXT: and.w r0, r1, r0, lsr #19
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c1_i32:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r1, r0, #19
+; V6M-NEXT: ldr r0, .LCPI53_0
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI53_0:
+; V6M-NEXT: .long 4092 @ 0xffc
+ %tmp0 = lshr i32 %arg, 19
+ %tmp1 = and i32 %tmp0, 4092
+ ret i32 %tmp1
+}
+
+; Should be still fine, but the result is shifted left afterwards
+define i32 @c2_i32(i32 %arg) nounwind {
+; V7M-LABEL: c2_i32:
+; V7M: @ %bb.0:
+; V7M-NEXT: movw r1, #4092
+; V7M-NEXT: and.w r0, r1, r0, lsr #17
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c2_i32:
+; V7A: @ %bb.0:
+; V7A-NEXT: movw r1, #4092
+; V7A-NEXT: and r0, r1, r0, lsr #17
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c2_i32:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movw r1, #4092
+; V7A-T-NEXT: and.w r0, r1, r0, lsr #17
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c2_i32:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r1, r0, #17
+; V6M-NEXT: ldr r0, .LCPI54_0
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI54_0:
+; V6M-NEXT: .long 4092 @ 0xffc
+ %tmp0 = lshr i32 %arg, 19
+ %tmp1 = and i32 %tmp0, 1023
+ %tmp2 = shl i32 %tmp1, 2
+ ret i32 %tmp2
+}
+
+; The mask covers newly shifted-in bit
+define i32 @c4_i32_bad(i32 %arg) nounwind {
+; V7M-LABEL: c4_i32_bad:
+; V7M: @ %bb.0:
+; V7M-NEXT: mvn r1, #1
+; V7M-NEXT: and.w r0, r1, r0, lsr #19
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c4_i32_bad:
+; V7A: @ %bb.0:
+; V7A-NEXT: mvn r1, #1
+; V7A-NEXT: and r0, r1, r0, lsr #19
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c4_i32_bad:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mvn r1, #1
+; V7A-T-NEXT: and.w r0, r1, r0, lsr #19
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c4_i32_bad:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r0, #20
+; V6M-NEXT: lsls r0, r0, #1
+; V6M-NEXT: bx lr
+ %tmp0 = lshr i32 %arg, 19
+ %tmp1 = and i32 %tmp0, 16382
+ ret i32 %tmp1
+}
+
+; i64
+
+; The most canonical variant
+define i64 @c0_i64(i64 %arg) nounwind {
+; V7M-LABEL: c0_i64:
+; V7M: @ %bb.0:
+; V7M-NEXT: ubfx r0, r1, #19, #10
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c0_i64:
+; V7A: @ %bb.0:
+; V7A-NEXT: ubfx r0, r1, #19, #10
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c0_i64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ubfx r0, r1, #19, #10
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c0_i64:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsls r0, r1, #3
+; V6M-NEXT: lsrs r0, r0, #22
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+ %tmp0 = lshr i64 %arg, 51
+ %tmp1 = and i64 %tmp0, 1023
+ ret i64 %tmp1
+}
+
+; Should be still fine, but the mask is shifted
+define i64 @c1_i64(i64 %arg) nounwind {
+; V7M-LABEL: c1_i64:
+; V7M: @ %bb.0:
+; V7M-NEXT: movw r0, #4092
+; V7M-NEXT: and.w r0, r0, r1, lsr #19
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c1_i64:
+; V7A: @ %bb.0:
+; V7A-NEXT: movw r0, #4092
+; V7A-NEXT: and r0, r0, r1, lsr #19
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c1_i64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movw r0, #4092
+; V7A-T-NEXT: and.w r0, r0, r1, lsr #19
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c1_i64:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r1, r1, #19
+; V6M-NEXT: ldr r0, .LCPI57_0
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI57_0:
+; V6M-NEXT: .long 4092 @ 0xffc
+ %tmp0 = lshr i64 %arg, 51
+ %tmp1 = and i64 %tmp0, 4092
+ ret i64 %tmp1
+}
+
+; Should be still fine, but the result is shifted left afterwards
+define i64 @c2_i64(i64 %arg) nounwind {
+; V7M-LABEL: c2_i64:
+; V7M: @ %bb.0:
+; V7M-NEXT: movw r0, #4092
+; V7M-NEXT: and.w r0, r0, r1, lsr #17
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c2_i64:
+; V7A: @ %bb.0:
+; V7A-NEXT: movw r0, #4092
+; V7A-NEXT: and r0, r0, r1, lsr #17
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c2_i64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movw r0, #4092
+; V7A-T-NEXT: and.w r0, r0, r1, lsr #17
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c2_i64:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r1, r1, #17
+; V6M-NEXT: ldr r0, .LCPI58_0
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI58_0:
+; V6M-NEXT: .long 4092 @ 0xffc
+ %tmp0 = lshr i64 %arg, 51
+ %tmp1 = and i64 %tmp0, 1023
+ %tmp2 = shl i64 %tmp1, 2
+ ret i64 %tmp2
+}
+
+; The mask covers newly shifted-in bit
+define i64 @c4_i64_bad(i64 %arg) nounwind {
+; V7M-LABEL: c4_i64_bad:
+; V7M: @ %bb.0:
+; V7M-NEXT: mvn r0, #1
+; V7M-NEXT: and.w r0, r0, r1, lsr #19
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c4_i64_bad:
+; V7A: @ %bb.0:
+; V7A-NEXT: mvn r0, #1
+; V7A-NEXT: and r0, r0, r1, lsr #19
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c4_i64_bad:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mvn r0, #1
+; V7A-T-NEXT: and.w r0, r0, r1, lsr #19
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c4_i64_bad:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1, #20
+; V6M-NEXT: lsls r0, r0, #1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+ %tmp0 = lshr i64 %arg, 51
+ %tmp1 = and i64 %tmp0, 16382
+ ret i64 %tmp1
+}
+
+; ---------------------------------------------------------------------------- ;
+; Constant, storing the result afterwards.
+; ---------------------------------------------------------------------------- ;
+
+; i32
+
+; The most canonical variant
+define void @c5_i32(i32 %arg, ptr %ptr) nounwind {
+; V7M-LABEL: c5_i32:
+; V7M: @ %bb.0:
+; V7M-NEXT: ubfx r0, r0, #19, #10
+; V7M-NEXT: str r0, [r1]
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c5_i32:
+; V7A: @ %bb.0:
+; V7A-NEXT: ubfx r0, r0, #19, #10
+; V7A-NEXT: str r0, [r1]
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c5_i32:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ubfx r0, r0, #19, #10
+; V7A-T-NEXT: str r0, [r1]
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c5_i32:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsls r0, r0, #3
+; V6M-NEXT: lsrs r0, r0, #22
+; V6M-NEXT: str r0, [r1]
+; V6M-NEXT: bx lr
+ %tmp0 = lshr i32 %arg, 19
+ %tmp1 = and i32 %tmp0, 1023
+ store i32 %tmp1, ptr %ptr
+ ret void
+}
+
+; Should be still fine, but the mask is shifted
+define void @c6_i32(i32 %arg, ptr %ptr) nounwind {
+; V7M-LABEL: c6_i32:
+; V7M: @ %bb.0:
+; V7M-NEXT: ubfx r0, r0, #19, #12
+; V7M-NEXT: str r0, [r1]
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c6_i32:
+; V7A: @ %bb.0:
+; V7A-NEXT: ubfx r0, r0, #19, #12
+; V7A-NEXT: str r0, [r1]
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c6_i32:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ubfx r0, r0, #19, #12
+; V7A-T-NEXT: str r0, [r1]
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c6_i32:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsls r0, r0, #1
+; V6M-NEXT: lsrs r0, r0, #20
+; V6M-NEXT: str r0, [r1]
+; V6M-NEXT: bx lr
+ %tmp0 = lshr i32 %arg, 19
+ %tmp1 = and i32 %tmp0, 4095
+ store i32 %tmp1, ptr %ptr
+ ret void
+}
+
+; Should be still fine, but the result is shifted left afterwards
+define void @c7_i32(i32 %arg, ptr %ptr) nounwind {
+; V7M-LABEL: c7_i32:
+; V7M: @ %bb.0:
+; V7M-NEXT: movw r2, #4092
+; V7M-NEXT: and.w r0, r2, r0, lsr #17
+; V7M-NEXT: str r0, [r1]
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c7_i32:
+; V7A: @ %bb.0:
+; V7A-NEXT: movw r2, #4092
+; V7A-NEXT: and r0, r2, r0, lsr #17
+; V7A-NEXT: str r0, [r1]
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c7_i32:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movw r2, #4092
+; V7A-T-NEXT: and.w r0, r2, r0, lsr #17
+; V7A-T-NEXT: str r0, [r1]
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c7_i32:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r0, #17
+; V6M-NEXT: ldr r2, .LCPI62_0
+; V6M-NEXT: ands r2, r0
+; V6M-NEXT: str r2, [r1]
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI62_0:
+; V6M-NEXT: .long 4092 @ 0xffc
+ %tmp0 = lshr i32 %arg, 19
+ %tmp1 = and i32 %tmp0, 1023
+ %tmp2 = shl i32 %tmp1, 2
+ store i32 %tmp2, ptr %ptr
+ ret void
+}
+
+; i64
+
+; The most canonical variant
+define void @c5_i64(i64 %arg, ptr %ptr) nounwind {
+; V7M-LABEL: c5_i64:
+; V7M: @ %bb.0:
+; V7M-NEXT: movs r0, #0
+; V7M-NEXT: ubfx r1, r1, #19, #10
+; V7M-NEXT: strd r1, r0, [r2]
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c5_i64:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r0, #0
+; V7A-NEXT: str r0, [r2, #4]
+; V7A-NEXT: ubfx r0, r1, #19, #10
+; V7A-NEXT: str r0, [r2]
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c5_i64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movs r0, #0
+; V7A-T-NEXT: ubfx r1, r1, #19, #10
+; V7A-T-NEXT: strd r1, r0, [r2]
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c5_i64:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: lsls r1, r1, #3
+; V6M-NEXT: lsrs r1, r1, #22
+; V6M-NEXT: str r1, [r2]
+; V6M-NEXT: str r0, [r2, #4]
+; V6M-NEXT: bx lr
+ %tmp0 = lshr i64 %arg, 51
+ %tmp1 = and i64 %tmp0, 1023
+ store i64 %tmp1, ptr %ptr
+ ret void
+}
+
+; Should be still fine, but the mask is shifted
+define void @c6_i64(i64 %arg, ptr %ptr) nounwind {
+; V7M-LABEL: c6_i64:
+; V7M: @ %bb.0:
+; V7M-NEXT: movs r0, #0
+; V7M-NEXT: ubfx r1, r1, #19, #12
+; V7M-NEXT: strd r1, r0, [r2]
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c6_i64:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r0, #0
+; V7A-NEXT: str r0, [r2, #4]
+; V7A-NEXT: ubfx r0, r1, #19, #12
+; V7A-NEXT: str r0, [r2]
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c6_i64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movs r0, #0
+; V7A-T-NEXT: ubfx r1, r1, #19, #12
+; V7A-T-NEXT: strd r1, r0, [r2]
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c6_i64:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: lsls r1, r1, #1
+; V6M-NEXT: lsrs r1, r1, #20
+; V6M-NEXT: str r1, [r2]
+; V6M-NEXT: str r0, [r2, #4]
+; V6M-NEXT: bx lr
+ %tmp0 = lshr i64 %arg, 51
+ %tmp1 = and i64 %tmp0, 4095
+ store i64 %tmp1, ptr %ptr
+ ret void
+}
+
+; Should be still fine, but the result is shifted left afterwards
+define void @c7_i64(i64 %arg, ptr %ptr) nounwind {
+; V7M-LABEL: c7_i64:
+; V7M: @ %bb.0:
+; V7M-NEXT: movs r0, #0
+; V7M-NEXT: movw r3, #4092
+; V7M-NEXT: and.w r1, r3, r1, lsr #17
+; V7M-NEXT: strd r1, r0, [r2]
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: c7_i64:
+; V7A: @ %bb.0:
+; V7A-NEXT: movw r0, #4092
+; V7A-NEXT: mov r3, #0
+; V7A-NEXT: and r0, r0, r1, lsr #17
+; V7A-NEXT: stm r2, {r0, r3}
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: c7_i64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movs r0, #0
+; V7A-T-NEXT: movw r3, #4092
+; V7A-T-NEXT: and.w r1, r3, r1, lsr #17
+; V7A-T-NEXT: strd r1, r0, [r2]
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: c7_i64:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: lsrs r1, r1, #17
+; V6M-NEXT: ldr r3, .LCPI65_0
+; V6M-NEXT: ands r3, r1
+; V6M-NEXT: str r3, [r2]
+; V6M-NEXT: str r0, [r2, #4]
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI65_0:
+; V6M-NEXT: .long 4092 @ 0xffc
+ %tmp0 = lshr i64 %arg, 51
+ %tmp1 = and i64 %tmp0, 1023
+ %tmp2 = shl i64 %tmp1, 2
+ store i64 %tmp2, ptr %ptr
+ ret void
+}
diff --git a/llvm/test/CodeGen/ARM/extract-lowbits.ll b/llvm/test/CodeGen/ARM/extract-lowbits.ll
new file mode 100644
index 000000000000..373d998a0eeb
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/extract-lowbits.ll
@@ -0,0 +1,2752 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
+; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
+; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
+; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
+
+; Patterns:
+; a) x & (1 << nbits) - 1
+; b) x & ~(-1 << nbits)
+; c) x & (-1 >> (32 - y))
+; d) x << (32 - y) >> (32 - y)
+; are equivalent.
+
+; ---------------------------------------------------------------------------- ;
+; Pattern a. 32-bit
+; ---------------------------------------------------------------------------- ;
+
+define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_a0:
+; V7M: @ %bb.0:
+; V7M-NEXT: movs r2, #1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_a0:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r2, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r1, r3, r2, lsl r1
+; V7A-NEXT: and r0, r1, r0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_a0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movs r2, #1
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_a0:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #1
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: subs r1, r2, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_a1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: movs r2, #1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_a1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r2, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r1, r3, r2, lsl r1
+; V7A-NEXT: and r0, r1, r0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_a1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movs r2, #1
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_a1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #1
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: subs r1, r2, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %conv = zext i8 %numlowbits to i32
+ %onebit = shl i32 1, %conv
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_a2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: movs r2, #1
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_a2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r2, #1
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r1, r3, r2, lsl r1
+; V7A-NEXT: and r0, r1, r0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_a2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movs r2, #1
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_a2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #1
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: subs r1, r2, #1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_a3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: movs r2, #1
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_a3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r2, #1
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r1, r3, r2, lsl r1
+; V7A-NEXT: and r0, r1, r0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_a3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movs r2, #1
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_a3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #1
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: subs r1, r2, #1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %conv = zext i8 %numlowbits to i32
+ %onebit = shl i32 1, %conv
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_a4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: movs r2, #1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: subs r1, #1
+; V7M-NEXT: ands r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_a4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: mov r2, #1
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: add r1, r3, r2, lsl r1
+; V7A-NEXT: and r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_a4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: movs r2, #1
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: subs r1, #1
+; V7A-T-NEXT: ands r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_a4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #1
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: subs r1, r2, #1
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %onebit = shl i32 1, %numlowbits
+ %mask = add nsw i32 %onebit, -1
+ %masked = and i32 %val, %mask ; swapped order
+ ret i32 %masked
+}
+
+; 64-bit
+
+define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_a0:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: mov.w r12, #1
+; V7M-NEXT: subs.w lr, r2, #32
+; V7M-NEXT: lsl.w r2, r12, r2
+; V7M-NEXT: lsr.w r3, r12, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r3, r12, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: subs r2, #1
+; V7M-NEXT: sbc r3, r3, #0
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bzhi64_a0:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsb r12, r2, #32
+; V7A-NEXT: mov lr, #1
+; V7A-NEXT: subs r3, r2, #32
+; V7A-NEXT: lsl r2, lr, r2
+; V7A-NEXT: lsr r12, lr, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: lslpl r12, lr, r3
+; V7A-NEXT: subs r2, r2, #1
+; V7A-NEXT: sbc r3, r12, #0
+; V7A-NEXT: and r0, r2, r0
+; V7A-NEXT: and r1, r3, r1
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_a0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: mov.w r12, #1
+; V7A-T-NEXT: subs.w lr, r2, #32
+; V7A-T-NEXT: lsl.w r2, r12, r2
+; V7A-T-NEXT: lsr.w r3, r12, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r3, r12, lr
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: subs r2, #1
+; V7A-T-NEXT: sbc r3, r3, #0
+; V7A-T-NEXT: ands r0, r2
+; V7A-T-NEXT: ands r1, r3
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_a0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, lr}
+; V6M-NEXT: push {r4, r5, r6, lr}
+; V6M-NEXT: mov r5, r1
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r6, #0
+; V6M-NEXT: mov r1, r6
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: subs r0, r0, #1
+; V6M-NEXT: sbcs r1, r6
+; V6M-NEXT: ands r1, r5
+; V6M-NEXT: ands r0, r4
+; V6M-NEXT: pop {r4, r5, r6, pc}
+ %onebit = shl i64 1, %numlowbits
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+; Check that we don't throw away the vreg_width-1 mask if not using shifts
+define i64 @bzhi64_a0_masked(i64 %val, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_a0_masked:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: and r2, r2, #63
+; V7M-NEXT: mov.w r12, #1
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: subs.w lr, r2, #32
+; V7M-NEXT: lsl.w r2, r12, r2
+; V7M-NEXT: lsr.w r3, r12, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r3, r12, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: subs r2, #1
+; V7M-NEXT: sbc r3, r3, #0
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bzhi64_a0_masked:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: and r2, r2, #63
+; V7A-NEXT: mov lr, #1
+; V7A-NEXT: rsb r12, r2, #32
+; V7A-NEXT: subs r3, r2, #32
+; V7A-NEXT: lsl r2, lr, r2
+; V7A-NEXT: lsr r12, lr, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: lslpl r12, lr, r3
+; V7A-NEXT: subs r2, r2, #1
+; V7A-NEXT: sbc r3, r12, #0
+; V7A-NEXT: and r0, r2, r0
+; V7A-NEXT: and r1, r3, r1
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_a0_masked:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: and r2, r2, #63
+; V7A-T-NEXT: mov.w r12, #1
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: subs.w lr, r2, #32
+; V7A-T-NEXT: lsl.w r2, r12, r2
+; V7A-T-NEXT: lsr.w r3, r12, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r3, r12, lr
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: subs r2, #1
+; V7A-T-NEXT: sbc r3, r3, #0
+; V7A-T-NEXT: ands r0, r2
+; V7A-T-NEXT: ands r1, r3
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_a0_masked:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, lr}
+; V6M-NEXT: push {r4, r5, r6, lr}
+; V6M-NEXT: mov r5, r1
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #63
+; V6M-NEXT: ands r2, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r6, #0
+; V6M-NEXT: mov r1, r6
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: subs r0, r0, #1
+; V6M-NEXT: sbcs r1, r6
+; V6M-NEXT: ands r1, r5
+; V6M-NEXT: ands r0, r4
+; V6M-NEXT: pop {r4, r5, r6, pc}
+ %numlowbits.masked = and i64 %numlowbits, 63
+ %onebit = shl i64 1, %numlowbits.masked
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_a1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: mov.w r12, #1
+; V7M-NEXT: subs.w lr, r2, #32
+; V7M-NEXT: lsl.w r2, r12, r2
+; V7M-NEXT: lsr.w r3, r12, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r3, r12, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: subs r2, #1
+; V7M-NEXT: sbc r3, r3, #0
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bzhi64_a1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsb r12, r2, #32
+; V7A-NEXT: mov lr, #1
+; V7A-NEXT: subs r3, r2, #32
+; V7A-NEXT: lsl r2, lr, r2
+; V7A-NEXT: lsr r12, lr, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: lslpl r12, lr, r3
+; V7A-NEXT: subs r2, r2, #1
+; V7A-NEXT: sbc r3, r12, #0
+; V7A-NEXT: and r0, r2, r0
+; V7A-NEXT: and r1, r3, r1
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_a1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: mov.w r12, #1
+; V7A-T-NEXT: subs.w lr, r2, #32
+; V7A-T-NEXT: lsl.w r2, r12, r2
+; V7A-T-NEXT: lsr.w r3, r12, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r3, r12, lr
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: subs r2, #1
+; V7A-T-NEXT: sbc r3, r3, #0
+; V7A-T-NEXT: ands r0, r2
+; V7A-T-NEXT: ands r1, r3
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_a1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, lr}
+; V6M-NEXT: push {r4, r5, r6, lr}
+; V6M-NEXT: mov r5, r1
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r6, #0
+; V6M-NEXT: mov r1, r6
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: subs r0, r0, #1
+; V6M-NEXT: sbcs r1, r6
+; V6M-NEXT: ands r1, r5
+; V6M-NEXT: ands r0, r4
+; V6M-NEXT: pop {r4, r5, r6, pc}
+ %conv = zext i8 %numlowbits to i64
+ %onebit = shl i64 1, %conv
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_a2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r2, #32
+; V7M-NEXT: movs r3, #1
+; V7M-NEXT: subs.w r12, r2, #32
+; V7M-NEXT: lsl.w r2, r3, r2
+; V7M-NEXT: lsr.w r1, r3, r1
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, r3, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: subs r2, #1
+; V7M-NEXT: ldrd r0, r3, [r0]
+; V7M-NEXT: sbc r1, r1, #0
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_a2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r6, r11, lr}
+; V7A-NEXT: push {r4, r6, r11, lr}
+; V7A-NEXT: ldr r6, [r0]
+; V7A-NEXT: mov r1, #1
+; V7A-NEXT: ldr r3, [r0, #4]
+; V7A-NEXT: rsb r0, r2, #32
+; V7A-NEXT: subs r4, r2, #32
+; V7A-NEXT: lsr r0, r1, r0
+; V7A-NEXT: lslpl r0, r1, r4
+; V7A-NEXT: lsl r1, r1, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: subs r2, r1, #1
+; V7A-NEXT: sbc r0, r0, #0
+; V7A-NEXT: and r1, r0, r3
+; V7A-NEXT: and r0, r2, r6
+; V7A-NEXT: pop {r4, r6, r11, pc}
+;
+; V7A-T-LABEL: bzhi64_a2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: movs r1, #1
+; V7A-T-NEXT: ldrd r12, lr, [r0]
+; V7A-T-NEXT: subs.w r0, r2, #32
+; V7A-T-NEXT: lsr.w r3, r1, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r3, r1, r0
+; V7A-T-NEXT: lsl.w r0, r1, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: subs r0, #1
+; V7A-T-NEXT: sbc r1, r3, #0
+; V7A-T-NEXT: and.w r0, r0, r12
+; V7A-T-NEXT: and.w r1, r1, lr
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_a2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r5, #0
+; V6M-NEXT: mov r1, r5
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: subs r2, r0, #1
+; V6M-NEXT: sbcs r1, r5
+; V6M-NEXT: ldm r4!, {r0, r3}
+; V6M-NEXT: ands r1, r3
+; V6M-NEXT: ands r0, r2
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %val = load i64, ptr %w
+ %onebit = shl i64 1, %numlowbits
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_a3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r2, r1, #32
+; V7M-NEXT: movs r3, #1
+; V7M-NEXT: subs.w r12, r1, #32
+; V7M-NEXT: lsl.w r1, r3, r1
+; V7M-NEXT: lsr.w r2, r3, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r2, r3, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: subs r3, r1, #1
+; V7M-NEXT: sbc r1, r2, #0
+; V7M-NEXT: ldrd r0, r2, [r0]
+; V7M-NEXT: ands r1, r2
+; V7M-NEXT: ands r0, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_a3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r6, r11, lr}
+; V7A-NEXT: push {r4, r6, r11, lr}
+; V7A-NEXT: ldr r6, [r0]
+; V7A-NEXT: mov r2, #1
+; V7A-NEXT: ldr r3, [r0, #4]
+; V7A-NEXT: rsb r0, r1, #32
+; V7A-NEXT: subs r4, r1, #32
+; V7A-NEXT: lsl r1, r2, r1
+; V7A-NEXT: lsr r0, r2, r0
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: lslpl r0, r2, r4
+; V7A-NEXT: subs r2, r1, #1
+; V7A-NEXT: sbc r0, r0, #0
+; V7A-NEXT: and r1, r0, r3
+; V7A-NEXT: and r0, r2, r6
+; V7A-NEXT: pop {r4, r6, r11, pc}
+;
+; V7A-T-LABEL: bzhi64_a3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r1, #32
+; V7A-T-NEXT: movs r2, #1
+; V7A-T-NEXT: ldrd r12, lr, [r0]
+; V7A-T-NEXT: subs.w r0, r1, #32
+; V7A-T-NEXT: lsr.w r3, r2, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r3, r2, r0
+; V7A-T-NEXT: lsl.w r0, r2, r1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: subs r0, #1
+; V7A-T-NEXT: sbc r1, r3, #0
+; V7A-T-NEXT: and.w r0, r0, r12
+; V7A-T-NEXT: and.w r1, r1, lr
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_a3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r2, r1
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r5, #0
+; V6M-NEXT: mov r1, r5
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: subs r2, r0, #1
+; V6M-NEXT: sbcs r1, r5
+; V6M-NEXT: ldm r4!, {r0, r3}
+; V6M-NEXT: ands r1, r3
+; V6M-NEXT: ands r0, r2
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %val = load i64, ptr %w
+ %conv = zext i8 %numlowbits to i64
+ %onebit = shl i64 1, %conv
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_a4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: mov.w r12, #1
+; V7M-NEXT: subs.w lr, r2, #32
+; V7M-NEXT: lsl.w r2, r12, r2
+; V7M-NEXT: lsr.w r3, r12, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r3, r12, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: subs r2, #1
+; V7M-NEXT: sbc r3, r3, #0
+; V7M-NEXT: ands r0, r2
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bzhi64_a4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsb r12, r2, #32
+; V7A-NEXT: mov lr, #1
+; V7A-NEXT: subs r3, r2, #32
+; V7A-NEXT: lsl r2, lr, r2
+; V7A-NEXT: lsr r12, lr, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: lslpl r12, lr, r3
+; V7A-NEXT: subs r2, r2, #1
+; V7A-NEXT: sbc r3, r12, #0
+; V7A-NEXT: and r0, r0, r2
+; V7A-NEXT: and r1, r1, r3
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_a4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #32
+; V7A-T-NEXT: mov.w r12, #1
+; V7A-T-NEXT: subs.w lr, r2, #32
+; V7A-T-NEXT: lsl.w r2, r12, r2
+; V7A-T-NEXT: lsr.w r3, r12, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r3, r12, lr
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: subs r2, #1
+; V7A-T-NEXT: sbc r3, r3, #0
+; V7A-T-NEXT: ands r0, r2
+; V7A-T-NEXT: ands r1, r3
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_a4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r6, lr}
+; V6M-NEXT: push {r4, r5, r6, lr}
+; V6M-NEXT: mov r5, r1
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #1
+; V6M-NEXT: movs r6, #0
+; V6M-NEXT: mov r1, r6
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: subs r0, r0, #1
+; V6M-NEXT: sbcs r1, r6
+; V6M-NEXT: ands r1, r5
+; V6M-NEXT: ands r0, r4
+; V6M-NEXT: pop {r4, r5, r6, pc}
+ %onebit = shl i64 1, %numlowbits
+ %mask = add nsw i64 %onebit, -1
+ %masked = and i64 %val, %mask ; swapped order
+ ret i64 %masked
+}
+
+; ---------------------------------------------------------------------------- ;
+; Pattern b. 32-bit
+; ---------------------------------------------------------------------------- ;
+
+define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_b0:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: bics r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_b0:
+; V7A: @ %bb.0:
+; V7A-NEXT: mvn r2, #0
+; V7A-NEXT: bic r0, r0, r2, lsl r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_b0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: bics r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_b0:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #0
+; V6M-NEXT: mvns r2, r2
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: bics r0, r2
+; V6M-NEXT: bx lr
+ %notmask = shl i32 -1, %numlowbits
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_b1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: bics r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_b1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: mvn r2, #0
+; V7A-NEXT: bic r0, r0, r2, lsl r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_b1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: bics r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_b1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #0
+; V6M-NEXT: mvns r2, r2
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: bics r0, r2
+; V6M-NEXT: bx lr
+ %conv = zext i8 %numlowbits to i32
+ %notmask = shl i32 -1, %conv
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_b2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: bics r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_b2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mvn r2, #0
+; V7A-NEXT: bic r0, r0, r2, lsl r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_b2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: bics r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_b2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #0
+; V6M-NEXT: mvns r2, r2
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: bics r0, r2
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %notmask = shl i32 -1, %numlowbits
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_b3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: bics r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_b3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mvn r2, #0
+; V7A-NEXT: bic r0, r0, r2, lsl r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_b3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: bics r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_b3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #0
+; V6M-NEXT: mvns r2, r2
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: bics r0, r2
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %conv = zext i8 %numlowbits to i32
+ %notmask = shl i32 -1, %conv
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_b4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: lsl.w r1, r2, r1
+; V7M-NEXT: bics r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_b4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: mvn r2, #0
+; V7A-NEXT: bic r0, r0, r2, lsl r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_b4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: lsl.w r1, r2, r1
+; V7A-T-NEXT: bics r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_b4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #0
+; V6M-NEXT: mvns r2, r2
+; V6M-NEXT: lsls r2, r1
+; V6M-NEXT: bics r0, r2
+; V6M-NEXT: bx lr
+ %notmask = shl i32 -1, %numlowbits
+ %mask = xor i32 %notmask, -1
+ %masked = and i32 %val, %mask ; swapped order
+ ret i32 %masked
+}
+
+; 64-bit
+
+define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_b0:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsl.w r12, r3, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl.w r12, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl r3, r2
+; V7M-NEXT: bic.w r0, r0, r12
+; V7M-NEXT: bics r1, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_b0:
+; V7A: @ %bb.0:
+; V7A-NEXT: subs r12, r2, #32
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsl r2, r3, r2
+; V7A-NEXT: lslpl r3, r3, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: bic r1, r1, r3
+; V7A-NEXT: bic r0, r0, r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_b0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsl.w r12, r3, r2
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl.w r12, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r3, r2
+; V7A-T-NEXT: bic.w r0, r0, r12
+; V7A-T-NEXT: bics r1, r3
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_b0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r5, r0
+; V6M-NEXT: bics r4, r1
+; V6M-NEXT: mov r0, r5
+; V6M-NEXT: mov r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %notmask = shl i64 -1, %numlowbits
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_b1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsl.w r12, r3, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl.w r12, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl r3, r2
+; V7M-NEXT: bic.w r0, r0, r12
+; V7M-NEXT: bics r1, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_b1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: subs r12, r2, #32
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsl r2, r3, r2
+; V7A-NEXT: lslpl r3, r3, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: bic r1, r1, r3
+; V7A-NEXT: bic r0, r0, r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_b1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsl.w r12, r3, r2
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl.w r12, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r3, r2
+; V7A-T-NEXT: bic.w r0, r0, r12
+; V7A-T-NEXT: bics r1, r3
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_b1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r5, r0
+; V6M-NEXT: bics r4, r1
+; V6M-NEXT: mov r0, r5
+; V6M-NEXT: mov r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %conv = zext i8 %numlowbits to i64
+ %notmask = shl i64 -1, %conv
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_b2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r1, #-1
+; V7M-NEXT: subs.w r12, r2, #32
+; V7M-NEXT: lsl.w r3, r1, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: ldrd r0, r2, [r0]
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, r1, r12
+; V7M-NEXT: bics r0, r3
+; V7M-NEXT: bic.w r1, r2, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_b2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, lr}
+; V7A-NEXT: push {r4, lr}
+; V7A-NEXT: ldr r4, [r0]
+; V7A-NEXT: mvn r1, #0
+; V7A-NEXT: ldr r3, [r0, #4]
+; V7A-NEXT: subs r0, r2, #32
+; V7A-NEXT: lsl r2, r1, r2
+; V7A-NEXT: lslpl r1, r1, r0
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: bic r1, r3, r1
+; V7A-NEXT: bic r0, r4, r2
+; V7A-NEXT: pop {r4, pc}
+;
+; V7A-T-LABEL: bzhi64_b2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r1, #-1
+; V7A-T-NEXT: ldrd r0, r12, [r0]
+; V7A-T-NEXT: lsl.w r3, r1, r2
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r3, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r1, r2
+; V7A-T-NEXT: bics r0, r3
+; V7A-T-NEXT: bic.w r1, r12, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_b2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: ldm r4!, {r2, r3}
+; V6M-NEXT: bics r2, r0
+; V6M-NEXT: bics r3, r1
+; V6M-NEXT: mov r0, r2
+; V6M-NEXT: mov r1, r3
+; V6M-NEXT: pop {r4, pc}
+ %val = load i64, ptr %w
+ %notmask = shl i64 -1, %numlowbits
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_b3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r2, #-1
+; V7M-NEXT: subs.w r12, r1, #32
+; V7M-NEXT: lsl.w r3, r2, r1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r3, #0
+; V7M-NEXT: ldrd r0, r1, [r0]
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r2, r2, r12
+; V7M-NEXT: bics r1, r2
+; V7M-NEXT: bics r0, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_b3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r6, r11, lr}
+; V7A-NEXT: push {r4, r6, r11, lr}
+; V7A-NEXT: mvn r2, #0
+; V7A-NEXT: ldr r6, [r0]
+; V7A-NEXT: ldr r3, [r0, #4]
+; V7A-NEXT: subs r0, r1, #32
+; V7A-NEXT: lsl r4, r2, r1
+; V7A-NEXT: lslpl r2, r2, r0
+; V7A-NEXT: movwpl r4, #0
+; V7A-NEXT: bic r1, r3, r2
+; V7A-NEXT: bic r0, r6, r4
+; V7A-NEXT: pop {r4, r6, r11, pc}
+;
+; V7A-T-LABEL: bzhi64_b3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: ldrd r0, r12, [r0]
+; V7A-T-NEXT: lsl.w r3, r2, r1
+; V7A-T-NEXT: subs r1, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r3, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r2, r1
+; V7A-T-NEXT: bics r0, r3
+; V7A-T-NEXT: bic.w r1, r12, r2
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_b3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: mov r2, r1
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: ldm r4!, {r2, r3}
+; V6M-NEXT: bics r2, r0
+; V6M-NEXT: bics r3, r1
+; V6M-NEXT: mov r0, r2
+; V6M-NEXT: mov r1, r3
+; V6M-NEXT: pop {r4, pc}
+ %val = load i64, ptr %w
+ %conv = zext i8 %numlowbits to i64
+ %notmask = shl i64 -1, %conv
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_b4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsl.w r12, r3, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl.w r12, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl r3, r2
+; V7M-NEXT: bic.w r0, r0, r12
+; V7M-NEXT: bics r1, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_b4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: subs r12, r2, #32
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsl r2, r3, r2
+; V7A-NEXT: lslpl r3, r3, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: bic r1, r1, r3
+; V7A-NEXT: bic r0, r0, r2
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_b4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsl.w r12, r3, r2
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl.w r12, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r3, r2
+; V7A-T-NEXT: bic.w r0, r0, r12
+; V7A-T-NEXT: bics r1, r3
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_b4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: bics r5, r0
+; V6M-NEXT: bics r4, r1
+; V6M-NEXT: mov r0, r5
+; V6M-NEXT: mov r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %notmask = shl i64 -1, %numlowbits
+ %mask = xor i64 %notmask, -1
+ %masked = and i64 %val, %mask ; swapped order
+ ret i64 %masked
+}
+
+; ---------------------------------------------------------------------------- ;
+; Pattern c. 32-bit
+; ---------------------------------------------------------------------------- ;
+
+define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_c0:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_c0:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_c0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_c0:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %numhighbits = sub i32 32, %numlowbits
+ %mask = lshr i32 -1, %numhighbits
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_c1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_c1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_c1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_c1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %numhighbits = sub i8 32, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i32
+ %mask = lshr i32 -1, %sh_prom
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_c2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_c2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_c2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_c2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %numhighbits = sub i32 32, %numlowbits
+ %mask = lshr i32 -1, %numhighbits
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_c3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_c3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_c3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_c3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %numhighbits = sub i8 32, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i32
+ %mask = lshr i32 -1, %sh_prom
+ %masked = and i32 %mask, %val
+ ret i32 %masked
+}
+
+define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_c4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_c4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_c4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_c4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %numhighbits = sub i32 32, %numlowbits
+ %mask = lshr i32 -1, %numhighbits
+ %masked = and i32 %val, %mask ; swapped order
+ ret i32 %masked
+}
+
+; 64-bit
+
+define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_c0:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsbs.w lr, r2, #32
+; V7M-NEXT: rsb.w r2, r2, #64
+; V7M-NEXT: mov.w r12, #-1
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsr.w r2, r12, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r3, r3, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: ands r0, r3
+; V7M-NEXT: ands r1, r2
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bzhi64_c0:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsbs lr, r2, #32
+; V7A-NEXT: rsb r2, r2, #64
+; V7A-NEXT: mvn r12, #0
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsr r2, r12, r2
+; V7A-NEXT: lsrpl r3, r3, lr
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: and r0, r3, r0
+; V7A-NEXT: and r1, r2, r1
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_c0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsbs.w lr, r2, #32
+; V7A-T-NEXT: rsb.w r2, r2, #64
+; V7A-T-NEXT: mov.w r12, #-1
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsr.w r2, r12, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r3, r3, lr
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: ands r0, r3
+; V7A-T-NEXT: ands r1, r2
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_c0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #64
+; V6M-NEXT: subs r2, r0, r2
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %numhighbits = sub i64 64, %numlowbits
+ %mask = lshr i64 -1, %numhighbits
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_c1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r2, r2, #64
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: uxtb r2, r2
+; V7M-NEXT: subs.w r12, r2, #32
+; V7M-NEXT: lsr.w r2, r3, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r3, r3, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: ands r0, r3
+; V7M-NEXT: ands r1, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_c1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsb lr, r2, #64
+; V7A-NEXT: mvn r2, #31
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: uxtb r12, lr
+; V7A-NEXT: uxtab r2, r2, lr
+; V7A-NEXT: lsr r12, r3, r12
+; V7A-NEXT: cmp r2, #0
+; V7A-NEXT: movwpl r12, #0
+; V7A-NEXT: lsrpl r3, r3, r2
+; V7A-NEXT: and r1, r12, r1
+; V7A-NEXT: and r0, r3, r0
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_c1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w lr, r2, #64
+; V7A-T-NEXT: mvn r2, #31
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: uxtb.w r12, lr
+; V7A-T-NEXT: uxtab r2, r2, lr
+; V7A-T-NEXT: lsr.w r12, r3, r12
+; V7A-T-NEXT: cmp r2, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl.w r12, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl r3, r2
+; V7A-T-NEXT: and.w r1, r1, r12
+; V7A-T-NEXT: ands r0, r3
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_c1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #64
+; V6M-NEXT: subs r0, r0, r2
+; V6M-NEXT: uxtb r2, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %numhighbits = sub i8 64, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i64
+ %mask = lshr i64 -1, %sh_prom
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_c2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsbs.w r1, r2, #32
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: rsb.w r2, r2, #64
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl r3, r1
+; V7M-NEXT: ldrd r0, r1, [r0]
+; V7M-NEXT: mov.w r12, #-1
+; V7M-NEXT: lsr.w r2, r12, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: ands r0, r3
+; V7M-NEXT: ands r1, r2
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_c2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r5, lr}
+; V7A-NEXT: push {r5, lr}
+; V7A-NEXT: rsbs r1, r2, #32
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: mvn r12, #0
+; V7A-NEXT: ldm r0, {r0, r5}
+; V7A-NEXT: lsrpl r3, r3, r1
+; V7A-NEXT: rsb r1, r2, #64
+; V7A-NEXT: and r0, r3, r0
+; V7A-NEXT: lsr r1, r12, r1
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: and r1, r1, r5
+; V7A-NEXT: pop {r5, pc}
+;
+; V7A-T-LABEL: bzhi64_c2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsbs.w r1, r2, #32
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: ldrd r0, lr, [r0]
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl r3, r1
+; V7A-T-NEXT: rsb.w r1, r2, #64
+; V7A-T-NEXT: mov.w r12, #-1
+; V7A-T-NEXT: and.w r0, r0, r3
+; V7A-T-NEXT: lsr.w r1, r12, r1
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: and.w r1, r1, lr
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_c2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #64
+; V6M-NEXT: subs r2, r0, r2
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldm r4!, {r2, r3}
+; V6M-NEXT: ands r0, r2
+; V6M-NEXT: ands r1, r3
+; V6M-NEXT: pop {r4, pc}
+ %val = load i64, ptr %w
+ %numhighbits = sub i64 64, %numlowbits
+ %mask = lshr i64 -1, %numhighbits
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_c3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #64
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: subs.w r2, r1, #32
+; V7M-NEXT: lsr.w r1, r3, r1
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl r3, r2
+; V7M-NEXT: ldrd r0, r2, [r0]
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: ands r1, r2
+; V7M-NEXT: ands r0, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_c3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r4, r6, r11, lr}
+; V7A-NEXT: push {r4, r6, r11, lr}
+; V7A-NEXT: rsb r1, r1, #64
+; V7A-NEXT: mvn r4, #31
+; V7A-NEXT: mvn r2, #0
+; V7A-NEXT: ldr r6, [r0]
+; V7A-NEXT: ldr r3, [r0, #4]
+; V7A-NEXT: uxtb r0, r1
+; V7A-NEXT: uxtab r4, r4, r1
+; V7A-NEXT: lsr r0, r2, r0
+; V7A-NEXT: cmp r4, #0
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: and r1, r0, r3
+; V7A-NEXT: lsrpl r2, r2, r4
+; V7A-NEXT: and r0, r2, r6
+; V7A-NEXT: pop {r4, r6, r11, pc}
+;
+; V7A-T-LABEL: bzhi64_c3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r1, r1, #64
+; V7A-T-NEXT: mvn r3, #31
+; V7A-T-NEXT: ldrd r12, lr, [r0]
+; V7A-T-NEXT: mov.w r2, #-1
+; V7A-T-NEXT: uxtb r0, r1
+; V7A-T-NEXT: uxtab r3, r3, r1
+; V7A-T-NEXT: lsr.w r0, r2, r0
+; V7A-T-NEXT: cmp r3, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: and.w r1, r0, lr
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl r2, r3
+; V7A-T-NEXT: and.w r0, r2, r12
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_c3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: mov r4, r0
+; V6M-NEXT: movs r0, #64
+; V6M-NEXT: subs r0, r0, r1
+; V6M-NEXT: uxtb r2, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ldm r4!, {r2, r3}
+; V6M-NEXT: ands r0, r2
+; V6M-NEXT: ands r1, r3
+; V6M-NEXT: pop {r4, pc}
+ %val = load i64, ptr %w
+ %numhighbits = sub i8 64, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i64
+ %mask = lshr i64 -1, %sh_prom
+ %masked = and i64 %mask, %val
+ ret i64 %masked
+}
+
+define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_c4_commutative:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsbs.w lr, r2, #32
+; V7M-NEXT: rsb.w r2, r2, #64
+; V7M-NEXT: mov.w r12, #-1
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsr.w r2, r12, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r3, r3, lr
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r2, #0
+; V7M-NEXT: ands r0, r3
+; V7M-NEXT: ands r1, r2
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bzhi64_c4_commutative:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsbs lr, r2, #32
+; V7A-NEXT: rsb r2, r2, #64
+; V7A-NEXT: mvn r12, #0
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsr r2, r12, r2
+; V7A-NEXT: lsrpl r3, r3, lr
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: and r0, r0, r3
+; V7A-NEXT: and r1, r1, r2
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_c4_commutative:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsbs.w lr, r2, #32
+; V7A-T-NEXT: rsb.w r2, r2, #64
+; V7A-T-NEXT: mov.w r12, #-1
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsr.w r2, r12, r2
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r3, r3, lr
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r2, #0
+; V7A-T-NEXT: ands r0, r3
+; V7A-T-NEXT: ands r1, r2
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_c4_commutative:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #64
+; V6M-NEXT: subs r2, r0, r2
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %numhighbits = sub i64 64, %numlowbits
+ %mask = lshr i64 -1, %numhighbits
+ %masked = and i64 %val, %mask ; swapped order
+ ret i64 %masked
+}
+
+; ---------------------------------------------------------------------------- ;
+; Pattern d. 32-bit.
+; ---------------------------------------------------------------------------- ;
+
+define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_d0:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_d0:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_d0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_d0:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %numhighbits = sub i32 32, %numlowbits
+ %highbitscleared = shl i32 %val, %numhighbits
+ %masked = lshr i32 %highbitscleared, %numhighbits
+ ret i32 %masked
+}
+
+define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_d1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_d1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_d1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_d1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %numhighbits = sub i8 32, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i32
+ %highbitscleared = shl i32 %val, %sh_prom
+ %masked = lshr i32 %highbitscleared, %sh_prom
+ ret i32 %masked
+}
+
+define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_d2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_d2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_d2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_d2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %numhighbits = sub i32 32, %numlowbits
+ %highbitscleared = shl i32 %val, %numhighbits
+ %masked = lshr i32 %highbitscleared, %numhighbits
+ ret i32 %masked
+}
+
+define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
+; V7M-LABEL: bzhi32_d3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #32
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_d3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: rsb r1, r1, #32
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: uxtb r1, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_d3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: rsb.w r1, r1, #32
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: uxtb r1, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_d3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #32
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: uxtb r1, r1
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: bx lr
+ %val = load i32, ptr %w
+ %numhighbits = sub i8 32, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i32
+ %highbitscleared = shl i32 %val, %sh_prom
+ %masked = lshr i32 %highbitscleared, %sh_prom
+ ret i32 %masked
+}
+
+; 64-bit.
+
+define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_d0:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsb.w r3, r2, #64
+; V7M-NEXT: rsbs.w r2, r2, #32
+; V7M-NEXT: rsb.w lr, r3, #32
+; V7M-NEXT: lsl.w r12, r1, r3
+; V7M-NEXT: lsr.w r1, r0, lr
+; V7M-NEXT: orr.w r1, r1, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, r0, r2
+; V7M-NEXT: lsl.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsl.w r12, r1, lr
+; V7M-NEXT: lsr.w r0, r0, r3
+; V7M-NEXT: orr.w r0, r0, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r2
+; V7M-NEXT: lsr.w r1, r1, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bzhi64_d0:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsb lr, r2, #64
+; V7A-NEXT: rsbs r2, r2, #32
+; V7A-NEXT: rsb r12, lr, #32
+; V7A-NEXT: lsr r3, r0, r12
+; V7A-NEXT: orr r1, r3, r1, lsl lr
+; V7A-NEXT: lslpl r1, r0, r2
+; V7A-NEXT: lsl r0, r0, lr
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lsr r0, r0, lr
+; V7A-NEXT: orr r0, r0, r1, lsl r12
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: lsr r1, r1, lr
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_d0:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #64
+; V7A-T-NEXT: rsbs.w r2, r2, #32
+; V7A-T-NEXT: rsb.w lr, r3, #32
+; V7A-T-NEXT: lsl.w r12, r1, r3
+; V7A-T-NEXT: lsr.w r1, r0, lr
+; V7A-T-NEXT: orr.w r1, r1, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r1, r0, r2
+; V7A-T-NEXT: lsl.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsl.w r12, r1, lr
+; V7A-T-NEXT: lsr.w r0, r0, r3
+; V7A-T-NEXT: orr.w r0, r0, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: lsr.w r1, r1, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_d0:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: movs r3, #64
+; V6M-NEXT: subs r4, r3, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, pc}
+ %numhighbits = sub i64 64, %numlowbits
+ %highbitscleared = shl i64 %val, %numhighbits
+ %masked = lshr i64 %highbitscleared, %numhighbits
+ ret i64 %masked
+}
+
+define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_d1_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r2, r2, #64
+; V7M-NEXT: uxtb r2, r2
+; V7M-NEXT: rsb.w r3, r2, #32
+; V7M-NEXT: lsl.w r12, r1, r2
+; V7M-NEXT: lsr.w r1, r0, r3
+; V7M-NEXT: orr.w r1, r1, r12
+; V7M-NEXT: subs.w r12, r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r1, r0, r12
+; V7M-NEXT: lsl.w r0, r0, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsl.w r3, r1, r3
+; V7M-NEXT: lsr.w r0, r0, r2
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r1, r12
+; V7M-NEXT: lsr.w r1, r1, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_d1_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r11, lr}
+; V7A-NEXT: push {r11, lr}
+; V7A-NEXT: rsb lr, r2, #64
+; V7A-NEXT: uxtb r3, lr
+; V7A-NEXT: rsb r12, r3, #32
+; V7A-NEXT: lsr r2, r0, r12
+; V7A-NEXT: orr r1, r2, r1, lsl r3
+; V7A-NEXT: mvn r2, #31
+; V7A-NEXT: uxtab r2, r2, lr
+; V7A-NEXT: cmp r2, #0
+; V7A-NEXT: lslpl r1, r0, r2
+; V7A-NEXT: lsl r0, r0, r3
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lsr r0, r0, r3
+; V7A-NEXT: orr r0, r0, r1, lsl r12
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: lsr r1, r1, r3
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: pop {r11, pc}
+;
+; V7A-T-LABEL: bzhi64_d1_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: rsb.w r4, r2, #64
+; V7A-T-NEXT: mvn r2, #31
+; V7A-T-NEXT: uxtb r3, r4
+; V7A-T-NEXT: rsb.w lr, r3, #32
+; V7A-T-NEXT: lsl.w r12, r1, r3
+; V7A-T-NEXT: uxtab r2, r2, r4
+; V7A-T-NEXT: lsr.w r1, r0, lr
+; V7A-T-NEXT: cmp r2, #0
+; V7A-T-NEXT: orr.w r1, r1, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r1, r0, r2
+; V7A-T-NEXT: lsl.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsl.w r4, r1, lr
+; V7A-T-NEXT: lsr.w r0, r0, r3
+; V7A-T-NEXT: orr.w r0, r0, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: lsr.w r1, r1, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bzhi64_d1_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: movs r3, #64
+; V6M-NEXT: subs r2, r3, r2
+; V6M-NEXT: uxtb r4, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, pc}
+ %numhighbits = sub i8 64, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i64
+ %highbitscleared = shl i64 %val, %sh_prom
+ %masked = lshr i64 %highbitscleared, %sh_prom
+ ret i64 %masked
+}
+
+define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_d2_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: .save {r7, lr}
+; V7M-NEXT: push {r7, lr}
+; V7M-NEXT: rsb.w r1, r2, #64
+; V7M-NEXT: ldrd r0, r3, [r0]
+; V7M-NEXT: rsb.w lr, r1, #32
+; V7M-NEXT: rsbs.w r2, r2, #32
+; V7M-NEXT: lsl.w r12, r3, r1
+; V7M-NEXT: lsr.w r3, r0, lr
+; V7M-NEXT: orr.w r3, r3, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r3, r0, r2
+; V7M-NEXT: lsl.w r0, r0, r1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsl.w r12, r3, lr
+; V7M-NEXT: lsr.w r0, r0, r1
+; V7M-NEXT: lsr.w r1, r3, r1
+; V7M-NEXT: orr.w r0, r0, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r3, r2
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: pop {r7, pc}
+;
+; V7A-LABEL: bzhi64_d2_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r5, lr}
+; V7A-NEXT: push {r5, lr}
+; V7A-NEXT: rsb r3, r2, #64
+; V7A-NEXT: ldm r0, {r0, r5}
+; V7A-NEXT: rsb r12, r3, #32
+; V7A-NEXT: rsbs r2, r2, #32
+; V7A-NEXT: lsr r1, r0, r12
+; V7A-NEXT: orr r1, r1, r5, lsl r3
+; V7A-NEXT: lslpl r1, r0, r2
+; V7A-NEXT: lsl r0, r0, r3
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lsr r0, r0, r3
+; V7A-NEXT: orr r0, r0, r1, lsl r12
+; V7A-NEXT: lsrpl r0, r1, r2
+; V7A-NEXT: lsr r1, r1, r3
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: pop {r5, pc}
+;
+; V7A-T-LABEL: bzhi64_d2_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r7, lr}
+; V7A-T-NEXT: push {r7, lr}
+; V7A-T-NEXT: rsb.w r3, r2, #64
+; V7A-T-NEXT: ldrd r0, r1, [r0]
+; V7A-T-NEXT: rsb.w lr, r3, #32
+; V7A-T-NEXT: rsbs.w r2, r2, #32
+; V7A-T-NEXT: lsl.w r12, r1, r3
+; V7A-T-NEXT: lsr.w r1, r0, lr
+; V7A-T-NEXT: orr.w r1, r1, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r1, r0, r2
+; V7A-T-NEXT: lsl.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsl.w r12, r1, lr
+; V7A-T-NEXT: lsr.w r0, r0, r3
+; V7A-T-NEXT: orr.w r0, r0, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r1, r2
+; V7A-T-NEXT: lsr.w r1, r1, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: pop {r7, pc}
+;
+; V6M-LABEL: bzhi64_d2_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: movs r1, #64
+; V6M-NEXT: subs r4, r1, r2
+; V6M-NEXT: ldr r2, [r0]
+; V6M-NEXT: ldr r1, [r0, #4]
+; V6M-NEXT: mov r0, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, pc}
+ %val = load i64, ptr %w
+ %numhighbits = sub i64 64, %numlowbits
+ %highbitscleared = shl i64 %val, %numhighbits
+ %masked = lshr i64 %highbitscleared, %numhighbits
+ ret i64 %masked
+}
+
+define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
+; V7M-LABEL: bzhi64_d3_load_indexzext:
+; V7M: @ %bb.0:
+; V7M-NEXT: rsb.w r1, r1, #64
+; V7M-NEXT: ldrd r0, r2, [r0]
+; V7M-NEXT: uxtb r1, r1
+; V7M-NEXT: rsb.w r3, r1, #32
+; V7M-NEXT: lsl.w r12, r2, r1
+; V7M-NEXT: lsr.w r2, r0, r3
+; V7M-NEXT: orr.w r2, r2, r12
+; V7M-NEXT: subs.w r12, r1, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl.w r2, r0, r12
+; V7M-NEXT: lsl.w r0, r0, r1
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r0, #0
+; V7M-NEXT: lsl.w r3, r2, r3
+; V7M-NEXT: lsr.w r0, r0, r1
+; V7M-NEXT: lsr.w r1, r2, r1
+; V7M-NEXT: orr.w r0, r0, r3
+; V7M-NEXT: it pl
+; V7M-NEXT: lsrpl.w r0, r2, r12
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_d3_load_indexzext:
+; V7A: @ %bb.0:
+; V7A-NEXT: .save {r5, lr}
+; V7A-NEXT: push {r5, lr}
+; V7A-NEXT: rsb r1, r1, #64
+; V7A-NEXT: ldm r0, {r0, r5}
+; V7A-NEXT: uxtb r2, r1
+; V7A-NEXT: rsb r12, r2, #32
+; V7A-NEXT: lsr r3, r0, r12
+; V7A-NEXT: orr r3, r3, r5, lsl r2
+; V7A-NEXT: mvn r5, #31
+; V7A-NEXT: uxtab r1, r5, r1
+; V7A-NEXT: cmp r1, #0
+; V7A-NEXT: lslpl r3, r0, r1
+; V7A-NEXT: lsl r0, r0, r2
+; V7A-NEXT: movwpl r0, #0
+; V7A-NEXT: lsr r0, r0, r2
+; V7A-NEXT: orr r0, r0, r3, lsl r12
+; V7A-NEXT: lsrpl r0, r3, r1
+; V7A-NEXT: lsr r1, r3, r2
+; V7A-NEXT: movwpl r1, #0
+; V7A-NEXT: pop {r5, pc}
+;
+; V7A-T-LABEL: bzhi64_d3_load_indexzext:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: .save {r4, lr}
+; V7A-T-NEXT: push {r4, lr}
+; V7A-T-NEXT: rsb.w r4, r1, #64
+; V7A-T-NEXT: ldrd r0, r2, [r0]
+; V7A-T-NEXT: mvn r1, #31
+; V7A-T-NEXT: uxtb r3, r4
+; V7A-T-NEXT: rsb.w lr, r3, #32
+; V7A-T-NEXT: lsl.w r12, r2, r3
+; V7A-T-NEXT: uxtab r1, r1, r4
+; V7A-T-NEXT: lsr.w r2, r0, lr
+; V7A-T-NEXT: cmp r1, #0
+; V7A-T-NEXT: orr.w r2, r2, r12
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl.w r2, r0, r1
+; V7A-T-NEXT: lsl.w r0, r0, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r0, #0
+; V7A-T-NEXT: lsl.w r4, r2, lr
+; V7A-T-NEXT: lsr.w r0, r0, r3
+; V7A-T-NEXT: orr.w r0, r0, r4
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lsrpl.w r0, r2, r1
+; V7A-T-NEXT: lsr.w r1, r2, r3
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl r1, #0
+; V7A-T-NEXT: pop {r4, pc}
+;
+; V6M-LABEL: bzhi64_d3_load_indexzext:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, lr}
+; V6M-NEXT: push {r4, lr}
+; V6M-NEXT: movs r2, #64
+; V6M-NEXT: subs r1, r2, r1
+; V6M-NEXT: uxtb r4, r1
+; V6M-NEXT: ldr r2, [r0]
+; V6M-NEXT: ldr r1, [r0, #4]
+; V6M-NEXT: mov r0, r2
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: mov r2, r4
+; V6M-NEXT: bl __aeabi_llsr
+; V6M-NEXT: pop {r4, pc}
+ %val = load i64, ptr %w
+ %numhighbits = sub i8 64, %numlowbits
+ %sh_prom = zext i8 %numhighbits to i64
+ %highbitscleared = shl i64 %val, %sh_prom
+ %masked = lshr i64 %highbitscleared, %sh_prom
+ ret i64 %masked
+}
+
+; ---------------------------------------------------------------------------- ;
+; Constant mask
+; ---------------------------------------------------------------------------- ;
+
+; 32-bit
+
+define i32 @bzhi32_constant_mask32(i32 %val) nounwind {
+; V7M-LABEL: bzhi32_constant_mask32:
+; V7M: @ %bb.0:
+; V7M-NEXT: bic r0, r0, #-2147483648
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_constant_mask32:
+; V7A: @ %bb.0:
+; V7A-NEXT: bic r0, r0, #-2147483648
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_constant_mask32:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: bic r0, r0, #-2147483648
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_constant_mask32:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r1, #1
+; V6M-NEXT: lsls r1, r1, #31
+; V6M-NEXT: bics r0, r1
+; V6M-NEXT: bx lr
+ %masked = and i32 %val, 2147483647
+ ret i32 %masked
+}
+
+define i32 @bzhi32_constant_mask32_load(ptr %val) nounwind {
+; V7M-LABEL: bzhi32_constant_mask32_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: bic r0, r0, #-2147483648
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_constant_mask32_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: bic r0, r0, #-2147483648
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_constant_mask32_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: bic r0, r0, #-2147483648
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_constant_mask32_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r1, #1
+; V6M-NEXT: lsls r1, r1, #31
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: bics r0, r1
+; V6M-NEXT: bx lr
+ %val1 = load i32, ptr %val
+ %masked = and i32 %val1, 2147483647
+ ret i32 %masked
+}
+
+define i32 @bzhi32_constant_mask16(i32 %val) nounwind {
+; V7M-LABEL: bzhi32_constant_mask16:
+; V7M: @ %bb.0:
+; V7M-NEXT: bfc r0, #15, #17
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_constant_mask16:
+; V7A: @ %bb.0:
+; V7A-NEXT: bfc r0, #15, #17
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_constant_mask16:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: bfc r0, #15, #17
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_constant_mask16:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r1, .LCPI41_0
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI41_0:
+; V6M-NEXT: .long 32767 @ 0x7fff
+ %masked = and i32 %val, 32767
+ ret i32 %masked
+}
+
+define i32 @bzhi32_constant_mask16_load(ptr %val) nounwind {
+; V7M-LABEL: bzhi32_constant_mask16_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: bfc r0, #15, #17
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_constant_mask16_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: bfc r0, #15, #17
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_constant_mask16_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: bfc r0, #15, #17
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_constant_mask16_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r1, [r0]
+; V6M-NEXT: ldr r0, .LCPI42_0
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI42_0:
+; V6M-NEXT: .long 32767 @ 0x7fff
+ %val1 = load i32, ptr %val
+ %masked = and i32 %val1, 32767
+ ret i32 %masked
+}
+
+define i32 @bzhi32_constant_mask8(i32 %val) nounwind {
+; V7M-LABEL: bzhi32_constant_mask8:
+; V7M: @ %bb.0:
+; V7M-NEXT: and r0, r0, #127
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_constant_mask8:
+; V7A: @ %bb.0:
+; V7A-NEXT: and r0, r0, #127
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_constant_mask8:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: and r0, r0, #127
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_constant_mask8:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r1, #127
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %masked = and i32 %val, 127
+ ret i32 %masked
+}
+
+define i32 @bzhi32_constant_mask8_load(ptr %val) nounwind {
+; V7M-LABEL: bzhi32_constant_mask8_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: and r0, r0, #127
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi32_constant_mask8_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: and r0, r0, #127
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi32_constant_mask8_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: and r0, r0, #127
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi32_constant_mask8_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r1, [r0]
+; V6M-NEXT: movs r0, #127
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: bx lr
+ %val1 = load i32, ptr %val
+ %masked = and i32 %val1, 127
+ ret i32 %masked
+}
+
+; 64-bit
+
+define i64 @bzhi64_constant_mask64(i64 %val) nounwind {
+; V7M-LABEL: bzhi64_constant_mask64:
+; V7M: @ %bb.0:
+; V7M-NEXT: bic r1, r1, #-1073741824
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_constant_mask64:
+; V7A: @ %bb.0:
+; V7A-NEXT: bic r1, r1, #-1073741824
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_constant_mask64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: bic r1, r1, #-1073741824
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_constant_mask64:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r2, #3
+; V6M-NEXT: lsls r2, r2, #30
+; V6M-NEXT: bics r1, r2
+; V6M-NEXT: bx lr
+ %masked = and i64 %val, 4611686018427387903
+ ret i64 %masked
+}
+
+define i64 @bzhi64_constant_mask64_load(ptr %val) nounwind {
+; V7M-LABEL: bzhi64_constant_mask64_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldrd r0, r1, [r0]
+; V7M-NEXT: bic r1, r1, #-1073741824
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_constant_mask64_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldrd r0, r1, [r0]
+; V7A-NEXT: bic r1, r1, #-1073741824
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_constant_mask64_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldrd r0, r1, [r0]
+; V7A-T-NEXT: bic r1, r1, #-1073741824
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_constant_mask64_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r1, #3
+; V6M-NEXT: lsls r3, r1, #30
+; V6M-NEXT: ldr r2, [r0]
+; V6M-NEXT: ldr r1, [r0, #4]
+; V6M-NEXT: bics r1, r3
+; V6M-NEXT: mov r0, r2
+; V6M-NEXT: bx lr
+ %val1 = load i64, ptr %val
+ %masked = and i64 %val1, 4611686018427387903
+ ret i64 %masked
+}
+
+define i64 @bzhi64_constant_mask32(i64 %val) nounwind {
+; V7M-LABEL: bzhi64_constant_mask32:
+; V7M: @ %bb.0:
+; V7M-NEXT: bic r0, r0, #-2147483648
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_constant_mask32:
+; V7A: @ %bb.0:
+; V7A-NEXT: bic r0, r0, #-2147483648
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_constant_mask32:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: bic r0, r0, #-2147483648
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_constant_mask32:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r1, #1
+; V6M-NEXT: lsls r1, r1, #31
+; V6M-NEXT: bics r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+ %masked = and i64 %val, 2147483647
+ ret i64 %masked
+}
+
+define i64 @bzhi64_constant_mask32_load(ptr %val) nounwind {
+; V7M-LABEL: bzhi64_constant_mask32_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bic r0, r0, #-2147483648
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_constant_mask32_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bic r0, r0, #-2147483648
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_constant_mask32_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bic r0, r0, #-2147483648
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_constant_mask32_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r1, #1
+; V6M-NEXT: lsls r1, r1, #31
+; V6M-NEXT: ldr r0, [r0]
+; V6M-NEXT: bics r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+ %val1 = load i64, ptr %val
+ %masked = and i64 %val1, 2147483647
+ ret i64 %masked
+}
+
+define i64 @bzhi64_constant_mask16(i64 %val) nounwind {
+; V7M-LABEL: bzhi64_constant_mask16:
+; V7M: @ %bb.0:
+; V7M-NEXT: bfc r0, #15, #17
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_constant_mask16:
+; V7A: @ %bb.0:
+; V7A-NEXT: bfc r0, #15, #17
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_constant_mask16:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: bfc r0, #15, #17
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_constant_mask16:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r1, .LCPI49_0
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI49_0:
+; V6M-NEXT: .long 32767 @ 0x7fff
+ %masked = and i64 %val, 32767
+ ret i64 %masked
+}
+
+define i64 @bzhi64_constant_mask16_load(ptr %val) nounwind {
+; V7M-LABEL: bzhi64_constant_mask16_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bfc r0, #15, #17
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_constant_mask16_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bfc r0, #15, #17
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_constant_mask16_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bfc r0, #15, #17
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_constant_mask16_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r1, [r0]
+; V6M-NEXT: ldr r0, .LCPI50_0
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+; V6M-NEXT: .p2align 2
+; V6M-NEXT: @ %bb.1:
+; V6M-NEXT: .LCPI50_0:
+; V6M-NEXT: .long 32767 @ 0x7fff
+ %val1 = load i64, ptr %val
+ %masked = and i64 %val1, 32767
+ ret i64 %masked
+}
+
+define i64 @bzhi64_constant_mask8(i64 %val) nounwind {
+; V7M-LABEL: bzhi64_constant_mask8:
+; V7M: @ %bb.0:
+; V7M-NEXT: and r0, r0, #127
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_constant_mask8:
+; V7A: @ %bb.0:
+; V7A-NEXT: and r0, r0, #127
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_constant_mask8:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: and r0, r0, #127
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_constant_mask8:
+; V6M: @ %bb.0:
+; V6M-NEXT: movs r1, #127
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+ %masked = and i64 %val, 127
+ ret i64 %masked
+}
+
+define i64 @bzhi64_constant_mask8_load(ptr %val) nounwind {
+; V7M-LABEL: bzhi64_constant_mask8_load:
+; V7M: @ %bb.0:
+; V7M-NEXT: ldr r0, [r0]
+; V7M-NEXT: movs r1, #0
+; V7M-NEXT: and r0, r0, #127
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: bzhi64_constant_mask8_load:
+; V7A: @ %bb.0:
+; V7A-NEXT: ldr r0, [r0]
+; V7A-NEXT: mov r1, #0
+; V7A-NEXT: and r0, r0, #127
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: bzhi64_constant_mask8_load:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: ldr r0, [r0]
+; V7A-T-NEXT: movs r1, #0
+; V7A-T-NEXT: and r0, r0, #127
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: bzhi64_constant_mask8_load:
+; V6M: @ %bb.0:
+; V6M-NEXT: ldr r1, [r0]
+; V6M-NEXT: movs r0, #127
+; V6M-NEXT: ands r0, r1
+; V6M-NEXT: movs r1, #0
+; V6M-NEXT: bx lr
+ %val1 = load i64, ptr %val
+ %masked = and i64 %val1, 127
+ ret i64 %masked
+}
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
index 8e671c903add..f5b2e98b62fd 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
@@ -81,7 +81,7 @@ body: |
STRi12 killed renamable $r1, killed renamable $r0, 0, 14, $noreg :: (volatile store (s32) into %ir.LL, align 8)
dead renamable $r0 = SPACE 8920, undef renamable $r0
renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S)
- renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg
+ renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm
VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S)
renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg
dead renamable $r1 = SPACE 1350, undef renamable $r0
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
index 03ddd80ed0ea..4b6647683139 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
@@ -72,7 +72,7 @@ body: |
renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S)
renamable $s0 = VLDRH %const.1, 0, 14, $noreg :: (load (s16) from constant-pool)
dead renamable $r0 = SPACE 1230, undef renamable $r0
- renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg
+ renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm
VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S)
renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg
dead renamable $r1 = SPACE 1330, undef renamable $r0
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
index 46f028bd492d..c16a62a8a989 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
@@ -89,7 +89,7 @@ body: |
$sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 4
renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool)
- VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv
+ VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res)
FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv
Bcc %bb.2, 0, killed $cpsr
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
index 5a03fcdb7fdf..049b7d9b4613 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
@@ -95,7 +95,7 @@ body: |
$sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 4
renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool)
- VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv
+ VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res)
FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv
Bcc %bb.2, 0, killed $cpsr
diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index 800ee87b95ca..8230e47259dd 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -1572,26 +1572,11 @@ define void @test_fma(ptr %p, ptr %q, ptr %r) #0 {
}
define void @test_fabs(ptr %p) {
-; CHECK-FP16-LABEL: test_fabs:
-; CHECK-FP16: ldrh r1, [r0]
-; CHECK-FP16-NEXT: vmov s0, r1
-; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
-; CHECK-FP16-NEXT: vabs.f32 s0, s0
-; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0
-; CHECK-FP16-NEXT: vmov r1, s0
-; CHECK-FP16-NEXT: strh r1, [r0]
-; CHECK-FP16-NEXT: bx lr
-;
-; CHECK-LIBCALL-LABEL: test_fabs:
-; CHECK-LIBCALL: .save {r4, lr}
-; CHECK-LIBCALL-NEXT: push {r4, lr}
-; CHECK-LIBCALL-NEXT: mov r4, r0
-; CHECK-LIBCALL-NEXT: ldrh r0, [r0]
-; CHECK-LIBCALL-NEXT: bl __aeabi_h2f
-; CHECK-LIBCALL-NEXT: bic r0, r0, #-2147483648
-; CHECK-LIBCALL-NEXT: bl __aeabi_f2h
-; CHECK-LIBCALL-NEXT: strh r0, [r4]
-; CHECK-LIBCALL-NEXT: pop {r4, pc}
+; CHECK-ALL-LABEL: test_fabs:
+; CHECK-ALL: ldrh r1, [r0]
+; CHECK-ALL-NEXT: bfc r1, #15, #17
+; CHECK-ALL-NEXT: strh r1, [r0]
+; CHECK-ALL-NEXT: bx lr
%a = load half, ptr %p, align 2
%r = call half @llvm.fabs.f16(half %a)
store half %r, ptr %p
@@ -2454,26 +2439,11 @@ define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
}
define void @test_fneg(ptr %p1, ptr %p2) #0 {
-; CHECK-FP16-LABEL: test_fneg:
-; CHECK-FP16: ldrh r0, [r0]
-; CHECK-FP16-NEXT: vmov s0, r0
-; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
-; CHECK-FP16-NEXT: vneg.f32 s0, s0
-; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: strh r0, [r1]
-; CHECK-FP16-NEXT: bx lr
-;
-; CHECK-LIBCALL-LABEL: test_fneg:
-; CHECK-LIBCALL: .save {r4, lr}
-; CHECK-LIBCALL-NEXT: push {r4, lr}
-; CHECK-LIBCALL-NEXT: ldrh r0, [r0]
-; CHECK-LIBCALL-NEXT: mov r4, r1
-; CHECK-LIBCALL-NEXT: bl __aeabi_h2f
-; CHECK-LIBCALL-NEXT: eor r0, r0, #-2147483648
-; CHECK-LIBCALL-NEXT: bl __aeabi_f2h
-; CHECK-LIBCALL-NEXT: strh r0, [r4]
-; CHECK-LIBCALL-NEXT: pop {r4, pc}
+; CHECK-ALL-LABEL: test_fneg:
+; CHECK-ALL: ldrh r0, [r0]
+; CHECK-ALL-NEXT: eor r0, r0, #32768
+; CHECK-ALL-NEXT: strh r0, [r1]
+; CHECK-ALL-NEXT: bx lr
%v = load half, ptr %p1, align 2
%res = fneg half %v
store half %res, ptr %p2, align 2
diff --git a/llvm/test/CodeGen/ARM/fp16_fast_math.ll b/llvm/test/CodeGen/ARM/fp16_fast_math.ll
index 165eb4b8af43..47e1f84ff664 100644
--- a/llvm/test/CodeGen/ARM/fp16_fast_math.ll
+++ b/llvm/test/CodeGen/ARM/fp16_fast_math.ll
@@ -16,11 +16,11 @@ define half @normal_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -33,7 +33,7 @@ define half @normal_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -50,11 +50,11 @@ define half @fast_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -67,7 +67,7 @@ define half @fast_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -84,11 +84,11 @@ define half @ninf_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -101,7 +101,7 @@ define half @ninf_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -122,19 +122,19 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]]
- ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY8]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -148,9 +148,9 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -169,14 +169,14 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY6]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -190,9 +190,9 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -211,19 +211,19 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]]
- ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY8]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -237,9 +237,9 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = ninf VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b228d2a..a6f0a03fc7e5 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -383,8 +383,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; SOFT-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; SOFT-LABEL: utest_f16i32:
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r7, lr}
; SOFT-NEXT: push {r7, lr}
@@ -400,7 +400,7 @@ define i32 @utesth_f16i32(half %x) {
; SOFT-NEXT: .LBB7_2: @ %entry
; SOFT-NEXT: pop {r7, pc}
;
-; VFP2-LABEL: utesth_f16i32:
+; VFP2-LABEL: utest_f16i32:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
@@ -411,7 +411,7 @@ define i32 @utesth_f16i32(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: pop {r7, pc}
;
-; FULL-LABEL: utesth_f16i32:
+; FULL-LABEL: utest_f16i32:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.u32.f16 s0, s0
; FULL-NEXT: vmov r0, s0
@@ -3985,6 +3985,46 @@ entry:
ret i32 %spec.store.select7
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; SOFT-LABEL: ustest_f16i32_nsat:
+; SOFT: @ %bb.0:
+; SOFT-NEXT: .save {r7, lr}
+; SOFT-NEXT: push {r7, lr}
+; SOFT-NEXT: uxth r0, r0
+; SOFT-NEXT: bl __aeabi_h2f
+; SOFT-NEXT: bl __aeabi_f2iz
+; SOFT-NEXT: asrs r1, r0, #31
+; SOFT-NEXT: ands r0, r1
+; SOFT-NEXT: asrs r1, r0, #31
+; SOFT-NEXT: bics r0, r1
+; SOFT-NEXT: pop {r7, pc}
+;
+; VFP2-LABEL: ustest_f16i32_nsat:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: bl __aeabi_h2f
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: usat r0, #0, r0
+; VFP2-NEXT: pop {r7, pc}
+;
+; FULL-LABEL: ustest_f16i32_nsat:
+; FULL: @ %bb.0:
+; FULL-NEXT: vcvt.s32.f16 s0, s0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: usat r0, #0, r0
+; FULL-NEXT: bx lr
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index 96f009a4da02..ba31b353ee1f 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -748,8 +748,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i32:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
@@ -821,7 +821,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: vpop {d12, d13}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16-LABEL: utest_f16i32:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
@@ -1366,8 +1366,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i16:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -1441,7 +1441,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16-LABEL: utest_f16i16:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
@@ -2109,8 +2109,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, lr}
@@ -2148,7 +2148,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i64:
+; CHECK-FP16-LABEL: utest_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -2835,8 +2835,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i32_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -2881,7 +2881,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-LABEL: utest_f16i32_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -3344,8 +3344,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i16_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -3419,7 +3419,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-LABEL: utest_f16i16_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
@@ -4044,8 +4044,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, lr}
@@ -4083,7 +4083,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-LABEL: utest_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -4215,6 +4215,77 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i32_nsat:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, lr}
+; CHECK-NEON-NEXT: push {r4, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s3
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s20, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s16, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s18, r4
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s18
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s4, s16
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov.i32 q8, #0x0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: vmov r0, s4
+; CHECK-NEON-NEXT: vmov.32 d11[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d10[1], r0
+; CHECK-NEON-NEXT: vmin.s32 q9, q5, q8
+; CHECK-NEON-NEXT: vmax.s32 q0, q9, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: pop {r4, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s2, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s1
+; CHECK-FP16-NEXT: vmovx.f16 s4, s1
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s2
+; CHECK-FP16-NEXT: vmov.i32 q9, #0x0
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s2
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmin.s32 q8, q8, q9
+; CHECK-FP16-NEXT: vmax.s32 q0, q8, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/ARM/inline-asm-clobber.ll b/llvm/test/CodeGen/ARM/inline-asm-clobber.ll
index 7b1331f3f1e8..f44ad2a896ad 100644
--- a/llvm/test/CodeGen/ARM/inline-asm-clobber.ll
+++ b/llvm/test/CodeGen/ARM/inline-asm-clobber.ll
@@ -6,12 +6,19 @@
; RUN: llc <%s -mtriple=arm-none-eabi --frame-pointer=all 2>&1 \
; RUN: | FileCheck %s -check-prefix=NO_FP_ELIM
+; RUN: llc <%s -mtriple=armv6-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS2
+; RUN: llc <%s -mtriple=armv6k-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS2
+; RUN: llc <%s -mtriple=armv6k-apple-ios3 2>&1 | FileCheck %s -check-prefix=IOS3
+; RUN: llc <%s -mtriple=armv7-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS3
+
; CHECK: warning: inline asm clobber list contains reserved registers: SP, PC
; CHECK: warning: inline asm clobber list contains reserved registers: R11
; RWPI: warning: inline asm clobber list contains reserved registers: R9, SP, PC
; RWPI: warning: inline asm clobber list contains reserved registers: R11
; NO_FP_ELIM: warning: inline asm clobber list contains reserved registers: R11, SP, PC
; NO_FP_ELIM: warning: inline asm clobber list contains reserved registers: R11
+; IOS2: warning: inline asm clobber list contains reserved registers: R9, SP, PC
+; IOS3: warning: inline asm clobber list contains reserved registers: SP, PC
define void @foo() nounwind {
call void asm sideeffect "mov r7, #1",
diff --git a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
index c92839020f83..90142cbf6bff 100644
--- a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
@@ -6,7 +6,7 @@ target triple = "armv7-eabi"
declare void @bar1()
define void @foo()#0 {
-; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
+; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpscr_rm $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
call void @bar1()
call void @bar2()
ret void
diff --git a/llvm/test/CodeGen/ARM/issue159343.ll b/llvm/test/CodeGen/ARM/issue159343.ll
new file mode 100644
index 000000000000..03292582918a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/issue159343.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s | FileCheck %s
+
+; Make sure there's no assertion from peephole-opt introducing illegal
+; subregister index uses.
+
+target triple = "thumbv7-unknown-linux-android29"
+
+define void @_ZN11VersionEdit10DecodeFromEv(i1 %call4, ptr %__profc__ZN11VersionEdit10DecodeFromEv) nounwind {
+; CHECK-LABEL: _ZN11VersionEdit10DecodeFromEv:
+; CHECK: @ %bb.0: @ %land.rhs.lr.ph
+; CHECK-NEXT: lsls r0, r0, #31
+; CHECK-NEXT: beq .LBB0_2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: adr r0, .LCPI0_0
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0:128]
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2: @ %select.false
+; CHECK-NEXT: vmov.i32 q0, #0x0
+; CHECK-NEXT: .LBB0_3: @ %select.end
+; CHECK-NEXT: vldr s5, .LCPI0_1
+; CHECK-NEXT: vldr s4, .LCPI0_2
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vst1.64 {d2, d3}, [r1]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.4:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long 1 @ 0x1
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 1 @ 0x1
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .LCPI0_1:
+; CHECK-NEXT: .long 0x00000000 @ float 0
+; CHECK-NEXT: .LCPI0_2:
+; CHECK-NEXT: .long 0x00000001 @ float 1.40129846E-45
+land.rhs.lr.ph:
+ br i1 %call4, label %sw.bb, label %while.cond.while.end_crit_edge.split.loop.exit43
+
+while.cond.while.end_crit_edge.split.loop.exit43: ; preds = %land.rhs.lr.ph
+ %ext0 = extractelement <4 x i64> zeroinitializer, i64 0
+ br label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge: ; preds = %sw.bb, %while.cond.while.end_crit_edge.split.loop.exit43
+ %pgocount5374.ph = phi i64 [ %ext1, %sw.bb ], [ %ext0, %while.cond.while.end_crit_edge.split.loop.exit43 ]
+ %ins = insertelement <2 x i64> splat (i64 1), i64 %pgocount5374.ph, i64 1
+ store <2 x i64> %ins, ptr %__profc__ZN11VersionEdit10DecodeFromEv, align 8
+ ret void
+
+sw.bb: ; preds = %land.rhs.lr.ph
+ %ext1 = extractelement <4 x i64> splat (i64 1), i64 0
+ br label %while.cond.while.end_crit_edge
+}
+
diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
index 749ee00a3c68..7274a8b0ce34 100644
--- a/llvm/test/CodeGen/ARM/llrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -1,46 +1,80 @@
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-; SOFTFP-LABEL: testmsxh_builtin:
-; SOFTFP: bl llrintf
-; HARDFP-LABEL: testmsxh_builtin:
-; HARDFP: bl llrintf
define i64 @testmsxh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmsxh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: bl llrintf
+; CHECK-SOFT-NEXT: pop {r11, pc}
+;
+; CHECK-NOFP16-LABEL: testmsxh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: bl llrintf
+; CHECK-NOFP16-NEXT: pop {r11, pc}
+;
+; CHECK-FPv8-LABEL: testmsxh_builtin:
+; CHECK-FPv8: @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT: .save {r11, lr}
+; CHECK-FPv8-NEXT: push {r11, lr}
+; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT: bl llrintf
+; CHECK-FPv8-NEXT: pop {r11, pc}
+;
+; CHECK-FP16-LABEL: testmsxh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r11, lr}
+; CHECK-FP16-NEXT: push {r11, lr}
+; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FP16-NEXT: bl llrintf
+; CHECK-FP16-NEXT: pop {r11, pc}
entry:
%0 = tail call i64 @llvm.llrint.i64.f16(half %x)
ret i64 %0
}
-; SOFTFP-LABEL: testmsxs_builtin:
-; SOFTFP: bl llrintf
-; HARDFP-LABEL: testmsxs_builtin:
-; HARDFP: bl llrintf
define i64 @testmsxs_builtin(float %x) {
+; CHECK-LABEL: testmsxs_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: pop {r11, pc}
entry:
%0 = tail call i64 @llvm.llrint.i64.f32(float %x)
ret i64 %0
}
-; SOFTFP-LABEL: testmsxd_builtin:
-; SOFTFP: bl llrint
-; HARDFP-LABEL: testmsxd_builtin:
-; HARDFP: bl llrint
define i64 @testmsxd_builtin(double %x) {
+; CHECK-LABEL: testmsxd_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llrint
+; CHECK-NEXT: pop {r11, pc}
entry:
%0 = tail call i64 @llvm.llrint.i64.f64(double %x)
ret i64 %0
}
-; FIXME(#44744): incorrect libcall
-; SOFTFP-LABEL: testmsxq_builtin:
-; SOFTFP: bl llrintl
-; HARDFP-LABEL: testmsxq_builtin:
-; HARDFP: bl llrintl
define i64 @testmsxq_builtin(fp128 %x) {
+; CHECK-LABEL: testmsxq_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: pop {r11, pc}
entry:
%0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x)
ret i64 %0
}
-
-declare i64 @llvm.llrint.i64.f32(float) nounwind readnone
-declare i64 @llvm.llrint.i64.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/llround-conv.ll b/llvm/test/CodeGen/ARM/llround-conv.ll
index 0f57e4ab50a5..f734db89af2f 100644
--- a/llvm/test/CodeGen/ARM/llround-conv.ll
+++ b/llvm/test/CodeGen/ARM/llround-conv.ll
@@ -1,25 +1,71 @@
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+define i64 @testmsxh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmsxh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: bl llroundf
+; CHECK-SOFT-NEXT: pop {r11, pc}
+;
+; CHECK-NOFP16-LABEL: testmsxh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: bl llroundf
+; CHECK-NOFP16-NEXT: pop {r11, pc}
+;
+; CHECK-FP16-LABEL: testmsxh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r11, lr}
+; CHECK-FP16-NEXT: push {r11, lr}
+; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FP16-NEXT: bl llroundf
+; CHECK-FP16-NEXT: pop {r11, pc}
+entry:
+ %0 = tail call i64 @llvm.llround.i64.f16(half %x)
+ ret i64 %0
+}
-; SOFTFP-LABEL: testmsxs_builtin:
-; SOFTFP: bl llroundf
-; HARDFP-LABEL: testmsxs_builtin:
-; HARDFP: bl llroundf
define i64 @testmsxs_builtin(float %x) {
+; CHECK-LABEL: testmsxs_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llroundf
+; CHECK-NEXT: pop {r11, pc}
entry:
- %0 = tail call i64 @llvm.llround.f32(float %x)
+ %0 = tail call i64 @llvm.llround.i64.f32(float %x)
ret i64 %0
}
-; SOFTFP-LABEL: testmsxd_builtin:
-; SOFTFP: bl llround
-; HARDFP-LABEL: testmsxd_builtin:
-; HARDFP: bl llround
define i64 @testmsxd_builtin(double %x) {
+; CHECK-LABEL: testmsxd_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llround
+; CHECK-NEXT: pop {r11, pc}
entry:
- %0 = tail call i64 @llvm.llround.f64(double %x)
+ %0 = tail call i64 @llvm.llround.i64.f64(double %x)
ret i64 %0
}
-declare i64 @llvm.llround.f32(float) nounwind readnone
-declare i64 @llvm.llround.f64(double) nounwind readnone
+define i64 @testmsxq_builtin(fp128 %x) {
+; CHECK-LABEL: testmsxq_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llroundl
+; CHECK-NEXT: pop {r11, pc}
+entry:
+ %0 = tail call i64 @llvm.llround.i64.f128(fp128 %x)
+ ret i64 %0
+}
diff --git a/llvm/test/CodeGen/ARM/llvm.exp10.ll b/llvm/test/CodeGen/ARM/llvm.exp10.ll
index eb72fe8c1e1b..49397ca386cb 100644
--- a/llvm/test/CodeGen/ARM/llvm.exp10.ll
+++ b/llvm/test/CodeGen/ARM/llvm.exp10.ll
@@ -189,12 +189,13 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl exp10f
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov s17, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: vmov s16, r0
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: vmov s18, r6
-; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, pc}
@@ -207,7 +208,6 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: mov r4, r3
@@ -216,17 +216,15 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl exp10f
-; CHECK-NEXT: vmov s19, r0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl exp10f
-; CHECK-NEXT: vmov s18, r0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: vmov s17, r7
; CHECK-NEXT: bl exp10f
-; CHECK-NEXT: vmov s16, r0
-; CHECK-NEXT: vmov r2, r3, d9
-; CHECK-NEXT: vmov r0, r1, d8
-; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
%r = call <4 x float> @llvm.exp10.v4f32(<4 x float> %x)
diff --git a/llvm/test/CodeGen/ARM/llvm.frexp.ll b/llvm/test/CodeGen/ARM/llvm.frexp.ll
index 376426d701b3..80972b75cf28 100644
--- a/llvm/test/CodeGen/ARM/llvm.frexp.ll
+++ b/llvm/test/CodeGen/ARM/llvm.frexp.ll
@@ -362,33 +362,31 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) {
; CHECK-LABEL: test_frexp_v4f32_v4i32_only_use_fract:
; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: mov r0, r3
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: sub sp, #20
+; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: vmov s19, r0
-; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: add r1, sp, #8
-; CHECK-NEXT: vmov s18, r0
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: add r1, sp, #12
-; CHECK-NEXT: vmov s17, r0
+; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl frexpf
-; CHECK-NEXT: vmov s16, r0
-; CHECK-NEXT: vmov r2, r3, d9
-; CHECK-NEXT: vmov r0, r1, d8
-; CHECK-NEXT: add sp, #16
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; CHECK-NEXT: add r1, sp, #16
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: add sp, #20
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
%result.0 = extractvalue { <4 x float>, <4 x i32> } %result, 0
ret <4 x float> %result.0
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 9aa95112af53..2de234919a14 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,43 +1,70 @@
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-; ret i32 %0
-; }
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: pop {r11, lr}
+; CHECK-SOFT-NEXT: b lrintf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: pop {r11, lr}
+; CHECK-NOFP16-NEXT: b lrintf
+;
+; CHECK-FPv8-LABEL: testmswh_builtin:
+; CHECK-FPv8: @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT: b lrintf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vrintx.f16 s0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+ ret i32 %0
+}
-; SOFTFP-LABEL: testmsws_builtin:
-; SOFTFP: bl lrintf
-; HARDFP-LABEL: testmsws_builtin:
-; HARDFP: bl lrintf
define i32 @testmsws_builtin(float %x) {
+; CHECK-LABEL: testmsws_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: b lrintf
entry:
%0 = tail call i32 @llvm.lrint.i32.f32(float %x)
ret i32 %0
}
-; SOFTFP-LABEL: testmswd_builtin:
-; SOFTFP: bl lrint
-; HARDFP-LABEL: testmswd_builtin:
-; HARDFP: bl lrint
define i32 @testmswd_builtin(double %x) {
+; CHECK-LABEL: testmswd_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: b lrint
entry:
%0 = tail call i32 @llvm.lrint.i32.f64(double %x)
ret i32 %0
}
-; FIXME(#44744): incorrect libcall
-; SOFTFP-LABEL: testmswq_builtin:
-; SOFTFP: bl lrintl
-; HARDFP-LABEL: testmswq_builtin:
-; HARDFP: bl lrintl
define i32 @testmswq_builtin(fp128 %x) {
+; CHECK-LABEL: testmswq_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl lrintl
+; CHECK-NEXT: pop {r11, pc}
entry:
%0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
ret i32 %0
}
-
-declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
-declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/lround-conv.ll b/llvm/test/CodeGen/ARM/lround-conv.ll
index 3aaed74830b8..03f7a0d7a44c 100644
--- a/llvm/test/CodeGen/ARM/lround-conv.ll
+++ b/llvm/test/CodeGen/ARM/lround-conv.ll
@@ -1,25 +1,47 @@
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+;define i32 @testmswh_builtin(half %x) {
+;entry:
+; %0 = tail call i32 @llvm.lround.i32.f16(half %x)
+; ret i32 %0
+;}
-; SOFTFP-LABEL: testmsws_builtin:
-; SOFTFP: bl lroundf
-; HARDFP-LABEL: testmsws_builtin:
-; HARDFP: bl lroundf
define i32 @testmsws_builtin(float %x) {
+; CHECK-LABEL: testmsws_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: b lroundf
entry:
%0 = tail call i32 @llvm.lround.i32.f32(float %x)
ret i32 %0
}
-; SOFTFP-LABEL: testmswd_builtin:
-; SOFTFP: bl lround
-; HARDFP-LABEL: testmswd_builtin:
-; HARDFP: bl lround
define i32 @testmswd_builtin(double %x) {
+; CHECK-LABEL: testmswd_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: b lround
entry:
%0 = tail call i32 @llvm.lround.i32.f64(double %x)
ret i32 %0
}
-declare i32 @llvm.lround.i32.f32(float) nounwind readnone
-declare i32 @llvm.lround.i32.f64(double) nounwind readnone
+define i32 @testmswq_builtin(fp128 %x) {
+; CHECK-LABEL: testmswq_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl lroundl
+; CHECK-NEXT: pop {r11, pc}
+entry:
+ %0 = tail call i32 @llvm.lround.i32.f128(fp128 %x)
+ ret i32 %0
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-FP16: {{.*}}
+; CHECK-FPv8: {{.*}}
+; CHECK-NOFP16: {{.*}}
+; CHECK-SOFT: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
index 46f3e4b08559..17d66196505a 100644
--- a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
+++ b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
@@ -14,7 +14,7 @@
# CHECK: SU(1): %1:dpr = VABSD %0:dpr, 14, $noreg
# CHECK: SU(2): %2:dpr = VLDRD %const.0, 0, 14, $noreg :: (load (s64) from constant-pool)
# CHECK: SU(4): %3:rgpr = t2MOVi 0, 14, $noreg, $noreg
-# CHECK: SU(3): VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv
+# CHECK: SU(3): VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
# CHECK: SU(5): $r0 = COPY %3:rgpr
---
name: test
@@ -29,7 +29,7 @@ body: |
%0:dpr = COPY $d0
%1:dpr = VABSD %0, 14 /* CC::al */, $noreg
%2:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool)
- VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv
+ VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
%4:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
$r0 = COPY %4
tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
diff --git a/llvm/test/CodeGen/ARM/nnan-fsub.ll b/llvm/test/CodeGen/ARM/nnan-fsub.ll
index 01839083547b..78dd36f95491 100644
--- a/llvm/test/CodeGen/ARM/nnan-fsub.ll
+++ b/llvm/test/CodeGen/ARM/nnan-fsub.ll
@@ -1,18 +1,22 @@
-; RUN: llc -mcpu=cortex-a9 < %s | FileCheck -check-prefix=SAFE %s
-; RUN: llc -mcpu=cortex-a9 --enable-no-nans-fp-math < %s | FileCheck -check-prefix=FAST %s
+; RUN: llc -mcpu=cortex-a9 < %s | FileCheck %s
target triple = "armv7-apple-ios"
-; SAFE: test
-; FAST: test
+; CHECK-LABEL: test
define float @test(float %x, float %y) {
entry:
-; SAFE: vmul.f32
-; SAFE: vsub.f32
-; FAST: mov r0, #0
+; CHECK: vmul.f32
+; CHECK-NEXT: vsub.f32
%0 = fmul float %x, %y
%1 = fsub float %0, %0
ret float %1
}
-
+; CHECK-LABEL: test_nnan
+define float @test_nnan(float %x, float %y) {
+entry:
+; CHECK: mov r0, #0
+ %0 = fmul float %x, %y
+ %1 = fsub nnan float %0, %0
+ ret float %1
+}
diff --git a/llvm/test/CodeGen/ARM/pr159343.mir b/llvm/test/CodeGen/ARM/pr159343.mir
new file mode 100644
index 000000000000..9b71b1ad94b2
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/pr159343.mir
@@ -0,0 +1,31 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -run-pass=peephole-opt -verify-machineinstrs -mtriple=thumbv7-unknown-linux-android29 %s -o - | FileCheck %s
+---
+name: Test_shouldRewriteCopySrc_Invalid_SubReg
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $r0, $r1
+
+ ; CHECK-LABEL: name: Test_shouldRewriteCopySrc_Invalid_SubReg
+ ; CHECK: liveins: $r0, $r1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:dpair = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr_vfp2 = COPY [[DEF]].dsub_0
+ ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]].ssub_1
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:spr = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:spr = IMPLICIT_DEF
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:mqpr = REG_SEQUENCE killed [[DEF2]], %subreg.ssub_0, killed [[DEF1]], %subreg.ssub_1, killed [[DEF3]], %subreg.ssub_2, [[COPY]].ssub_1, %subreg.ssub_3
+ ; CHECK-NEXT: VST1q64 $r1, 0, killed [[REG_SEQUENCE]], 14 /* CC::al */, $noreg
+ %0:dpair = IMPLICIT_DEF
+ %1:dpr = COPY %0.dsub_0
+ %2:gpr, %3:gpr = VMOVRRD killed %1, 14 /* CC::al */, $noreg
+ %4:spr = VMOVSR killed %3, 14 /* CC::al */, $noreg
+ %5:spr = IMPLICIT_DEF
+ %6:spr = IMPLICIT_DEF
+ %7:spr = IMPLICIT_DEF
+ %8:mqpr = REG_SEQUENCE killed %6, %subreg.ssub_0, killed %5, %subreg.ssub_1, killed %7, %subreg.ssub_2, killed %4, %subreg.ssub_3
+ VST1q64 $r1, 0, killed %8, 14 /* CC::al */, $noreg
+...
diff --git a/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll b/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll
index e653aaa316fe..2bf8f29eccb4 100644
--- a/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll
+++ b/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll
@@ -12,8 +12,8 @@ define float @shouldRewriteCopySrc(double %arg) #0 {
; CHECK-NEXT: @APP
; CHECK-NEXT: nop
; CHECK-NEXT: @NO_APP
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov.f64 d0, d16
+; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $d0
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: bx lr
bb:
diff --git a/llvm/test/CodeGen/ARM/sincos.ll b/llvm/test/CodeGen/ARM/sincos.ll
index e1b683a8a665..1a4313e651d7 100644
--- a/llvm/test/CodeGen/ARM/sincos.ll
+++ b/llvm/test/CodeGen/ARM/sincos.ll
@@ -2,8 +2,7 @@
; RUN: llc < %s -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS
; RUN: llc < %s -mtriple=armv7-linux-gnu -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU
-; RUN: llc < %s -mtriple=armv7-linux-android -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT-ANDROID
-; RUN: llc < %s -mtriple=armv7-linux-android9 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU
+; RUN: llc < %s -mtriple=armv7-linux-android -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU
; Combine sin / cos into a single call unless they may write errno (as
; captured by readnone attrbiute, controlled by clang -fmath-errno
@@ -22,10 +21,6 @@ entry:
; NOOPT: bl _sinf
; NOOPT: bl _cosf
-; NOOPT-ANDROID-LABEL: test1:
-; NOOPT-ANDROID: bl sinf
-; NOOPT-ANDROID: bl cosf
-
%call = tail call float @sinf(float %x) readnone
%call1 = tail call float @cosf(float %x) readnone
%add = fadd float %call, %call1
@@ -44,10 +39,6 @@ entry:
; NOOPT: bl _sinf
; NOOPT: bl _cosf
-; NOOPT-ANDROID-LABEL: test1_fast:
-; NOOPT-ANDROID: bl sinf
-; NOOPT-ANDROID: bl cosf
-
%call = tail call fast float @sinf(float %x) readnone
%call1 = tail call fast float @cosf(float %x) readnone
%add = fadd float %call, %call1
@@ -68,10 +59,6 @@ entry:
; NOOPT: bl _sinf
; NOOPT: bl _cosf
-; NOOPT-ANDROID-LABEL: test1_errno:
-; NOOPT-ANDROID: bl sinf
-; NOOPT-ANDROID: bl cosf
-
%call = tail call float @sinf(float %x)
%call1 = tail call float @cosf(float %x)
%add = fadd float %call, %call1
@@ -90,10 +77,6 @@ entry:
; NOOPT: bl _sin
; NOOPT: bl _cos
-; NOOPT-ANDROID-LABEL: test2:
-; NOOPT-ANDROID: bl sin
-; NOOPT-ANDROID: bl cos
-
%call = tail call double @sin(double %x) readnone
%call1 = tail call double @cos(double %x) readnone
%add = fadd double %call, %call1
@@ -112,10 +95,6 @@ entry:
; NOOPT: bl _sin
; NOOPT: bl _cos
-; NOOPT-ANDROID-LABEL: test2_fast:
-; NOOPT-ANDROID: bl sin
-; NOOPT-ANDROID: bl cos
-
%call = tail call fast double @sin(double %x) readnone
%call1 = tail call fast double @cos(double %x) readnone
%add = fadd double %call, %call1
@@ -136,10 +115,6 @@ entry:
; NOOPT: bl _sin
; NOOPT: bl _cos
-; NOOPT-ANDROID-LABEL: test2_errno:
-; NOOPT-ANDROID: bl sin
-; NOOPT-ANDROID: bl cos
-
%call = tail call double @sin(double %x)
%call1 = tail call double @cos(double %x)
%add = fadd double %call, %call1
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index fe5e3cbcdf77..c3c88840b1a6 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -9,36 +9,1290 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64
-; FIXME: crash "Do not know how to soft promote this operator's operand!"
-; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
-; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
-; ret <1 x iXLen> %a
-; }
-; declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; LE-I32-LABEL: lrint_v1f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_f2h
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_f2h
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_f2h
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_f2h
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+ ret <1 x iXLen> %a
+}
-; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
-; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
-; ret <2 x iXLen> %a
-; }
-; declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; LE-I32-LABEL: lrint_v2f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8}
+; LE-I32-NEXT: vpush {d8}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: vmov.f32 s16, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov r1, s16
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: mov r0, r1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: vpop {d8}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r11, lr}
+; LE-I64-NEXT: .vsave {d8, d9}
+; LE-I64-NEXT: vpush {d8, d9}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vpop {d8, d9}
+; LE-I64-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I32-LABEL: lrint_v2f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8}
+; BE-I32-NEXT: vpush {d8}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: vmov.f32 s16, s1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov r1, s16
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: mov r0, r1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 d0, d8
+; BE-I32-NEXT: vpop {d8}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r11, lr}
+; BE-I64-NEXT: .vsave {d8}
+; BE-I64-NEXT: vpush {d8}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: vpop {d8}
+; BE-I64-NEXT: pop {r4, r5, r11, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+ ret <2 x iXLen> %a
+}
-; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
-; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
-; ret <4 x iXLen> %a
-; }
-; declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; LE-I32-LABEL: lrint_v4f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEXT: vmov r0, s3
+; LE-I32-NEXT: vmov.f32 s16, s2
+; LE-I32-NEXT: vmov.f32 s18, s1
+; LE-I32-NEXT: vmov.f32 s20, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r4
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: .vsave {d12, d13}
+; LE-I64-NEXT: vpush {d12, d13}
+; LE-I64-NEXT: .vsave {d8, d9, d10}
+; LE-I64-NEXT: vpush {d8, d9, d10}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s20, s2
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d13[0], r5
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10}
+; LE-I64-NEXT: vpop {d12, d13}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-LABEL: lrint_v4f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEXT: vmov r0, s3
+; BE-I32-NEXT: vmov.f32 s16, s2
+; BE-I32-NEXT: vmov.f32 s18, s1
+; BE-I32-NEXT: vmov.f32 s20, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d11[1], r4
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s3
+; BE-I64-NEXT: vmov.f32 s18, s2
+; BE-I64-NEXT: vmov.f32 s20, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d9[0], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r6
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d9
+; BE-I64-NEXT: vrev64.32 d3, d8
+; BE-I64-NEXT: vrev64.32 d0, d10
+; BE-I64-NEXT: vrev64.32 d2, d16
+; BE-I64-NEXT: vpop {d8, d9, d10}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+ ret <4 x iXLen> %a
+}
-; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
-; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
-; ret <8 x iXLen> %a
-; }
-; declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+; LE-I32-LABEL: lrint_v8f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vmov r0, s7
+; LE-I32-NEXT: vmov.f32 s18, s6
+; LE-I32-NEXT: vmov.f32 s16, s5
+; LE-I32-NEXT: vmov.f32 s20, s4
+; LE-I32-NEXT: vmov.f32 s22, s3
+; LE-I32-NEXT: vmov.f32 s24, s2
+; LE-I32-NEXT: vmov.f32 s26, s1
+; LE-I32-NEXT: vmov.f32 s28, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q6, q6
+; LE-I32-NEXT: vorr q1, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #8
+; LE-I64-NEXT: sub sp, sp, #8
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vstr s6, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s16, s7
+; LE-I64-NEXT: vmov.f32 s18, s5
+; LE-I64-NEXT: vmov.f32 s20, s4
+; LE-I64-NEXT: vmov.f32 s22, s3
+; LE-I64-NEXT: vmov.f32 s24, s2
+; LE-I64-NEXT: vmov.f32 s26, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r9, r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r10, r0
+; LE-I64-NEXT: vmov r0, s22
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s24
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r4
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r6
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r5
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r10
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r9
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov.32 d11[1], r11
+; LE-I64-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEXT: vorr q1, q7, q7
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: vorr q2, q6, q6
+; LE-I64-NEXT: vorr q3, q5, q5
+; LE-I64-NEXT: add sp, sp, #8
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vmov.f32 s18, s7
+; BE-I32-NEXT: vmov.f32 s20, s6
+; BE-I32-NEXT: vmov.f32 s16, s5
+; BE-I32-NEXT: vmov.f32 s22, s4
+; BE-I32-NEXT: vmov.f32 s24, s3
+; BE-I32-NEXT: vmov.f32 s26, s2
+; BE-I32-NEXT: vmov.f32 s28, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s22
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d12[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q6
+; BE-I32-NEXT: vrev64.32 q1, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: .pad #8
+; BE-I64-NEXT: sub sp, sp, #8
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s18, s7
+; BE-I64-NEXT: vmov.f32 s16, s6
+; BE-I64-NEXT: vmov.f32 s20, s5
+; BE-I64-NEXT: vmov.f32 s22, s4
+; BE-I64-NEXT: vmov.f32 s24, s3
+; BE-I64-NEXT: vmov.f32 s26, s2
+; BE-I64-NEXT: vmov.f32 s28, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s28
+; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r10, r0
+; BE-I64-NEXT: vmov r0, s24
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s26
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s22
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r4
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r10
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r9
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov.32 d9[1], r11
+; BE-I64-NEXT: vmov.32 d14[1], r4
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: vmov.32 d10[1], r8
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d3, d13
+; BE-I64-NEXT: vrev64.32 d5, d11
+; BE-I64-NEXT: vrev64.32 d7, d9
+; BE-I64-NEXT: vrev64.32 d0, d14
+; BE-I64-NEXT: vrev64.32 d2, d12
+; BE-I64-NEXT: vrev64.32 d4, d10
+; BE-I64-NEXT: vrev64.32 d6, d16
+; BE-I64-NEXT: add sp, sp, #8
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+ ret <8 x iXLen> %a
+}
-; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
-; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
-; ret <16 x iXLen> %a
-; }
-; declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+; LE-I32-LABEL: lrint_v16f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #8
+; LE-I32-NEXT: sub sp, sp, #8
+; LE-I32-NEXT: vmov r0, s15
+; LE-I32-NEXT: vstr s13, [sp, #4] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s26, s14
+; LE-I32-NEXT: vstr s0, [sp] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s20, s12
+; LE-I32-NEXT: vmov.f32 s22, s11
+; LE-I32-NEXT: vmov.f32 s18, s10
+; LE-I32-NEXT: vmov.f32 s17, s9
+; LE-I32-NEXT: vmov.f32 s24, s8
+; LE-I32-NEXT: vmov.f32 s19, s7
+; LE-I32-NEXT: vmov.f32 s30, s6
+; LE-I32-NEXT: vmov.f32 s21, s5
+; LE-I32-NEXT: vmov.f32 s16, s4
+; LE-I32-NEXT: vmov.f32 s23, s3
+; LE-I32-NEXT: vmov.f32 s28, s2
+; LE-I32-NEXT: vmov.f32 s25, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s17
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r10, r0
+; LE-I32-NEXT: vmov r0, s21
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s19
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s25
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s23
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: vmov r0, s30
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r10
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q7, q7
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: vorr q2, q6, q6
+; LE-I32-NEXT: vorr q3, q5, q5
+; LE-I32-NEXT: add sp, sp, #8
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v16f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #120
+; LE-I64-NEXT: sub sp, sp, #120
+; LE-I64-NEXT: mov r11, r0
+; LE-I64-NEXT: vmov r0, s7
+; LE-I64-NEXT: vstr s15, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s23, s13
+; LE-I64-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s25, s12
+; LE-I64-NEXT: vmov.f32 s27, s11
+; LE-I64-NEXT: vstr s10, [sp, #104] @ 4-byte Spill
+; LE-I64-NEXT: vstr s9, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s24, s8
+; LE-I64-NEXT: vmov.f32 s19, s6
+; LE-I64-NEXT: vmov.f32 s29, s5
+; LE-I64-NEXT: vmov.f32 s17, s4
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s21, s2
+; LE-I64-NEXT: vmov.f32 s26, s1
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s25
+; LE-I64-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s27
+; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s29
+; LE-I64-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vmov r0, s23
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d17[0], r6
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s17
+; LE-I64-NEXT: vmov r8, s21
+; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT: vmov r10, s19
+; LE-I64-NEXT: vmov.32 d10[0], r5
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vmov.32 d11[0], r6
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d11[0], r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: vmov.32 d10[1], r7
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov r8, s24
+; LE-I64-NEXT: vmov.32 d14[1], r9
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov s24, r5
+; LE-I64-NEXT: vldr s0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: vmov r7, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vmov s24, r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEXT: vmov s20, r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov r4, s0
+; LE-I64-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov s18, r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d10[1], r4
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.32 d19[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d21[1], r10
+; LE-I64-NEXT: vmov.32 d18[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEXT: add r0, r11, #64
+; LE-I64-NEXT: vmov.32 d16[1], r1
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vmov.32 d20[1], r9
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT: add sp, sp, #120
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #16
+; BE-I32-NEXT: sub sp, sp, #16
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vstr s14, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s30, s15
+; BE-I32-NEXT: vstr s13, [sp, #12] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s17, s12
+; BE-I32-NEXT: vstr s10, [sp, #8] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s19, s11
+; BE-I32-NEXT: vstr s8, [sp] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s21, s9
+; BE-I32-NEXT: vmov.f32 s23, s7
+; BE-I32-NEXT: vmov.f32 s24, s6
+; BE-I32-NEXT: vmov.f32 s25, s5
+; BE-I32-NEXT: vmov.f32 s26, s4
+; BE-I32-NEXT: vmov.f32 s27, s3
+; BE-I32-NEXT: vmov.f32 s28, s2
+; BE-I32-NEXT: vmov.f32 s29, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s27
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s25
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r10, r0
+; BE-I32-NEXT: vmov r0, s23
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s21
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s19
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s30
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s17
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: vmov r0, s29
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #8] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r10
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #12] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d10[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vrev64.32 q1, q7
+; BE-I32-NEXT: vrev64.32 q2, q6
+; BE-I32-NEXT: vrev64.32 q3, q4
+; BE-I32-NEXT: add sp, sp, #16
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I64-LABEL: lrint_v16f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #112
+; BE-I64-NEXT: sub sp, sp, #112
+; BE-I64-NEXT: mov r11, r0
+; BE-I64-NEXT: vmov r0, s14
+; BE-I64-NEXT: vmov.f32 s17, s15
+; BE-I64-NEXT: vstr s13, [sp, #52] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s21, s12
+; BE-I64-NEXT: vstr s10, [sp, #68] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s23, s11
+; BE-I64-NEXT: vstr s7, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s19, s9
+; BE-I64-NEXT: vstr s4, [sp, #28] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s26, s8
+; BE-I64-NEXT: vmov.f32 s24, s6
+; BE-I64-NEXT: vmov.f32 s18, s5
+; BE-I64-NEXT: vmov.f32 s25, s3
+; BE-I64-NEXT: vmov.f32 s16, s2
+; BE-I64-NEXT: vmov.f32 s27, s1
+; BE-I64-NEXT: vmov.f32 s29, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r8, r0
+; BE-I64-NEXT: vmov r0, s29
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s27
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s21
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s25
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s23
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r9
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov r0, s17
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d10[0], r8
+; BE-I64-NEXT: vmov r6, s19
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: vmov.32 d10[1], r4
+; BE-I64-NEXT: vstr d10, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: vmov.32 d11[1], r7
+; BE-I64-NEXT: vstr d11, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vstr d15, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr s0, [sp, #28] @ 4-byte Reload
+; BE-I64-NEXT: vmov r5, s26
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s26, r4
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d14[1], r10
+; BE-I64-NEXT: vmov r4, s24
+; BE-I64-NEXT: vstr d16, [sp] @ 8-byte Spill
+; BE-I64-NEXT: vstr d14, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s26
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vmov s24, r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s24
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #52] @ 4-byte Reload
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #68] @ 4-byte Reload
+; BE-I64-NEXT: vmov s20, r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov r7, s0
+; BE-I64-NEXT: vldr s0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s20
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov s18, r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d24[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT: vldr d23, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT: vldr d20, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d23[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT: vldr d22, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vmov.32 d22[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT: vldr d30, [sp] @ 8-byte Reload
+; BE-I64-NEXT: vldr d25, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d20, d26
+; BE-I64-NEXT: vldr d26, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEXT: vldr d28, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d27, d26
+; BE-I64-NEXT: vmov.32 d25[1], r0
+; BE-I64-NEXT: add r0, r11, #64
+; BE-I64-NEXT: vmov.32 d30[1], r8
+; BE-I64-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEXT: vrev64.32 d26, d28
+; BE-I64-NEXT: vrev64.32 d29, d10
+; BE-I64-NEXT: vmov.32 d24[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d12
+; BE-I64-NEXT: vrev64.32 d28, d23
+; BE-I64-NEXT: vrev64.32 d23, d22
+; BE-I64-NEXT: vrev64.32 d22, d30
+; BE-I64-NEXT: vrev64.32 d31, d25
+; BE-I64-NEXT: vrev64.32 d0, d9
+; BE-I64-NEXT: vrev64.32 d30, d24
+; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d19, d13
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d18, d14
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d17, d15
+; BE-I64-NEXT: vrev64.32 d16, d11
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-I64-NEXT: add sp, sp, #112
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+ ret <16 x iXLen> %a
+}
define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
; LE-I32-LABEL: lrint_v1f32:
@@ -76,7 +1330,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
ret <1 x iXLen> %a
}
-declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
; LE-I32-LABEL: lrint_v2f32:
@@ -160,7 +1413,6 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
ret <2 x iXLen> %a
}
-declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
; LE-I32-LABEL: lrint_v4f32:
@@ -274,7 +1526,6 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
%a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
ret <4 x iXLen> %a
}
-declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
; LE-I32-LABEL: lrint_v8f32:
@@ -488,7 +1739,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
%a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
ret <8 x iXLen> %a
}
-declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
; LE-I32-LABEL: lrint_v16f32:
@@ -1005,7 +2255,6 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
ret <16 x iXLen> %a
}
-declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
; LE-I32-LABEL: lrint_v1f64:
@@ -1043,7 +2292,6 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
ret <1 x iXLen> %a
}
-declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
; LE-I32-LABEL: lrint_v2f64:
@@ -1120,7 +2368,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
ret <2 x iXLen> %a
}
-declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; LE-I32-LABEL: lrint_v4f64:
@@ -1237,7 +2484,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
%a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
ret <4 x iXLen> %a
}
-declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; LE-I32-LABEL: lrint_v8f64:
@@ -1467,7 +2713,6 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
%a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
ret <8 x iXLen> %a
}
-declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
; LE-I32-LABEL: lrint_v16f64:
@@ -2053,7 +3298,6 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x)
ret <16 x iXLen> %a
}
-declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
; LE-I32-LABEL: lrint_v1fp128:
@@ -2091,7 +3335,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
%a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
ret <1 x iXLen> %a
}
-declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
; LE-I32-LABEL: lrint_v2fp128:
@@ -2194,7 +3437,6 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
%a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
ret <2 x iXLen> %a
}
-declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
; LE-I32-LABEL: lrint_v4fp128:
@@ -2347,7 +3589,6 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
%a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
ret <4 x iXLen> %a
}
-declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
; LE-I32-LABEL: lrint_v8fp128:
@@ -2664,7 +3905,6 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
%a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
ret <8 x iXLen> %a
}
-declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
; LE-I32-LABEL: lrint_v16fp128:
@@ -3262,4 +4502,3 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
%a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
ret <16 x iXLen> %a
}
-declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
diff --git a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
index 8fa9337eae6c..03cb8e37844c 100644
--- a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
+++ b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
@@ -60,9 +60,9 @@ body: |
$sp = t2STMDB_UPD $sp, 14, $noreg, $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11
$r4 = t2BICri $r4, 1, 14, $noreg, $noreg
$sp = tSUBspi $sp, 34, 14, $noreg
- VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15
+ VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $fpscr_rm, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15
tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7
- VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15
+ VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15
$sp = tADDspi $sp, 34, 14, $noreg
$sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
$sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc