summaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll168
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll6
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll14
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll8
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll6
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll70
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll9
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll10
-rw-r--r--llvm/test/Transforms/LoopVectorize/blend-in-header.ll4
-rw-r--r--llvm/test/Transforms/LoopVectorize/if-pred-stores.ll18
-rw-r--r--llvm/test/Transforms/LoopVectorize/induction.ll25
-rw-r--r--llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll6
-rw-r--r--llvm/test/Transforms/LoopVectorize/pr37248.ll12
-rw-r--r--llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll14
-rw-r--r--llvm/test/Transforms/LoopVectorize/reduction-small-size.ll6
-rw-r--r--llvm/test/Transforms/LoopVectorize/select-cmp.ll24
-rw-r--r--llvm/test/Transforms/LoopVectorize/single_early_exit.ll4
-rw-r--r--llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll52
-rw-r--r--llvm/test/Transforms/LoopVectorize/uniform-blend.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll539
22 files changed, 763 insertions, 238 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index ddf6c1005e05..254cdf2d14d9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -209,6 +209,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
@@ -218,7 +219,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], [[TMP3]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> splat (i8 1)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 08a600143190..8c5d84e6981b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -151,9 +151,9 @@ exit:
ret void
}
-define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4, ptr %src, ptr %dst.3, i1 %c.3, ptr %dst.2) {
+define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y.ptr, ptr %dst.1, i1 %c.4, ptr %src, ptr %dst.3, i1 %c.3, ptr %dst.2) {
; CHECK-LABEL: define void @test_exit_branch_cost(
-; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]], i32 [[Y:%.*]], ptr [[DST_1:%.*]], i1 [[C_4:%.*]], ptr [[SRC:%.*]], ptr [[DST_3:%.*]], i1 [[C_3:%.*]], ptr [[DST_2:%.*]]) {
+; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[X_PTR:%.*]], ptr noalias [[Y_PTR:%.*]], ptr [[DST_1:%.*]], i1 [[C_4:%.*]], ptr [[SRC:%.*]], ptr [[DST_3:%.*]], i1 [[C_3:%.*]], ptr [[DST_2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
@@ -172,11 +172,11 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4
; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP3]]
; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]]
-; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]]
+; CHECK-NEXT: [[CONFLICT_RDX21:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]]
; CHECK-NEXT: [[BOUND012:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP4]]
; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]]
-; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX11]], [[FOUND_CONFLICT14]]
+; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX21]], [[FOUND_CONFLICT14]]
; CHECK-NEXT: [[BOUND016:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND117:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP1]]
; CHECK-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]]
@@ -184,161 +184,101 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4
; CHECK-NEXT: [[BOUND020:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP3]]
; CHECK-NEXT: [[BOUND121:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[FOUND_CONFLICT22:%.*]] = and i1 [[BOUND020]], [[BOUND121]]
-; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]]
+; CHECK-NEXT: [[CONFLICT_RDX41:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]]
; CHECK-NEXT: [[BOUND024:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP4]]
; CHECK-NEXT: [[BOUND125:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]]
; CHECK-NEXT: [[FOUND_CONFLICT26:%.*]] = and i1 [[BOUND024]], [[BOUND125]]
-; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX23]], [[FOUND_CONFLICT26]]
+; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX41]], [[FOUND_CONFLICT26]]
; CHECK-NEXT: [[BOUND028:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP3]]
; CHECK-NEXT: [[BOUND129:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[FOUND_CONFLICT30:%.*]] = and i1 [[BOUND028]], [[BOUND129]]
-; CHECK-NEXT: [[CONFLICT_RDX31:%.*]] = or i1 [[CONFLICT_RDX27]], [[FOUND_CONFLICT30]]
+; CHECK-NEXT: [[CONFLICT_RDX65:%.*]] = or i1 [[CONFLICT_RDX27]], [[FOUND_CONFLICT30]]
; CHECK-NEXT: [[BOUND032:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP4]]
; CHECK-NEXT: [[BOUND133:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP2]]
-; CHECK-NEXT: [[FOUND_CONFLICT34:%.*]] = and i1 [[BOUND032]], [[BOUND133]]
-; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX31]], [[FOUND_CONFLICT34]]
+; CHECK-NEXT: [[FOUND_CONFLICT68:%.*]] = and i1 [[BOUND032]], [[BOUND133]]
+; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX65]], [[FOUND_CONFLICT68]]
; CHECK-NEXT: [[BOUND036:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP4]]
; CHECK-NEXT: [[BOUND137:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP3]]
; CHECK-NEXT: [[FOUND_CONFLICT38:%.*]] = and i1 [[BOUND036]], [[BOUND137]]
; CHECK-NEXT: [[CONFLICT_RDX39:%.*]] = or i1 [[CONFLICT_RDX35]], [[FOUND_CONFLICT38]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX39]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT40:%.*]] = insertelement <2 x i1> poison, i1 [[C_3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT41:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT40]], <2 x i1> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT56:%.*]] = insertelement <2 x i1> poison, i1 [[C_4]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT57:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT56]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE74:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true)
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE55:.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true)
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
+; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7:![0-9]+]], !noalias [[META10:![0-9]+]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
+; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]]
; CHECK: [[PRED_STORE_IF42]]:
; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]]
; CHECK: [[PRED_STORE_CONTINUE43]]:
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]]
+; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
+; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]]
; CHECK: [[PRED_STORE_IF44]]:
-; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]]
+; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]]
; CHECK: [[PRED_STORE_CONTINUE45]]:
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
-; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
+; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]]
; CHECK: [[PRED_STORE_IF46]]:
-; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]]
+; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]]
; CHECK: [[PRED_STORE_CONTINUE47]]:
-; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true)
-; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true)
-; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
-; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]]
-; CHECK: [[PRED_STORE_IF48]]:
-; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]]
-; CHECK: [[PRED_STORE_CONTINUE49]]:
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
-; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]]
-; CHECK: [[PRED_STORE_IF50]]:
-; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]]
-; CHECK: [[PRED_STORE_CONTINUE51]]:
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
-; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]]
-; CHECK: [[PRED_STORE_IF52]]:
-; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]]
-; CHECK: [[PRED_STORE_CONTINUE53]]:
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
-; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55:.*]]
-; CHECK: [[PRED_STORE_IF54]]:
-; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]]
-; CHECK: [[PRED_STORE_CONTINUE55]]:
-; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = or <2 x i1> [[TMP47]], [[TMP20]]
; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1)
; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1)
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP22]], i32 0
-; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]]
-; CHECK: [[PRED_STORE_IF59]]:
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[PREDPHI]], i32 0
-; CHECK-NEXT: store i64 [[TMP25]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE60]]
-; CHECK: [[PRED_STORE_CONTINUE60]]:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP22]], i32 1
-; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF61:.*]], label %[[PRED_STORE_CONTINUE62:.*]]
-; CHECK: [[PRED_STORE_IF61]]:
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i64> [[PREDPHI]], i32 1
-; CHECK-NEXT: store i64 [[TMP27]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE62]]
-; CHECK: [[PRED_STORE_CONTINUE62]]:
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP23]], i32 0
-; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF63:.*]], label %[[PRED_STORE_CONTINUE64:.*]]
-; CHECK: [[PRED_STORE_IF63]]:
+; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]]
+; CHECK: [[PRED_STORE_IF48]]:
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 0
-; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE64]]
-; CHECK: [[PRED_STORE_CONTINUE64]]:
+; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]]
+; CHECK: [[PRED_STORE_CONTINUE49]]:
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1
-; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF65:.*]], label %[[PRED_STORE_CONTINUE66:.*]]
-; CHECK: [[PRED_STORE_IF65]]:
+; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]]
+; CHECK: [[PRED_STORE_IF50]]:
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 1
; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE66]]
-; CHECK: [[PRED_STORE_CONTINUE66]]:
-; CHECK-NEXT: [[TMP32:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true)
-; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true)
-; CHECK-NEXT: [[TMP34:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[TMP32]], <2 x i1> zeroinitializer
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]]
+; CHECK: [[PRED_STORE_CONTINUE51]]:
; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP36:%.*]] = or <2 x i1> [[TMP22]], [[TMP34]]
; CHECK-NEXT: [[TMP37:%.*]] = or <2 x i1> [[TMP23]], [[TMP35]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0
-; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF67:.*]], label %[[PRED_STORE_CONTINUE68:.*]]
-; CHECK: [[PRED_STORE_IF67]]:
-; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]]
-; CHECK-NEXT: store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE68]]
-; CHECK: [[PRED_STORE_CONTINUE68]]:
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1
-; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF69:.*]], label %[[PRED_STORE_CONTINUE70:.*]]
-; CHECK: [[PRED_STORE_IF69]]:
-; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
-; CHECK-NEXT: store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE70]]
-; CHECK: [[PRED_STORE_CONTINUE70]]:
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0
-; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF71:.*]], label %[[PRED_STORE_CONTINUE72:.*]]
-; CHECK: [[PRED_STORE_IF71]]:
-; CHECK-NEXT: [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
-; CHECK-NEXT: store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE72]]
-; CHECK: [[PRED_STORE_CONTINUE72]]:
+; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]]
+; CHECK: [[PRED_STORE_IF52]]:
+; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]]
+; CHECK-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]]
+; CHECK: [[PRED_STORE_CONTINUE53]]:
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1
-; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF73:.*]], label %[[PRED_STORE_CONTINUE74]]
-; CHECK: [[PRED_STORE_IF73]]:
-; CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
-; CHECK-NEXT: store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE74]]
-; CHECK: [[PRED_STORE_CONTINUE74]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55]]
+; CHECK: [[PRED_STORE_IF54]]:
+; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
+; CHECK-NEXT: store i64 [[TMP25]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]]
+; CHECK: [[PRED_STORE_CONTINUE55]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[TMP46]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -348,6 +288,10 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[X_GEP:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[IV]]
+; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[X_GEP]], align 8
+; CHECK-NEXT: [[Y_GEP:%.*]] = getelementptr i32, ptr [[Y_PTR]], i64 [[IV]]
+; CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[Y_GEP]], align 4
; CHECK-NEXT: [[C1:%.*]] = icmp eq i64 [[X]], 0
; CHECK-NEXT: br i1 [[C1]], label %[[THEN_4:.*]], label %[[THEN_1:.*]]
; CHECK: [[THEN_1]]:
@@ -386,6 +330,10 @@ entry:
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
+ %x.gep = getelementptr i64, ptr %x.ptr, i64 %iv
+ %x = load i64, ptr %x.gep
+ %y.gep = getelementptr i32, ptr %y.ptr, i64 %iv
+ %y = load i32, ptr %y.gep
%c1 = icmp eq i64 %x, 0
br i1 %c1, label %then.4, label %then.1
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll
index f9c1ab4a8181..3d00c228baf5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll
@@ -75,17 +75,17 @@ define i32 @any_of_reduction_used_in_blend_with_mutliple_phis(ptr %src, i64 %N,
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i1> poison, i1 [[C_0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i1> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i1> poison, i1 [[C_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i1> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[SRC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT: [[TMP7:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i1> zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT4]], i32 8, <vscale x 2 x i1> [[TMP8]], <vscale x 2 x ptr> poison)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <vscale x 2 x ptr> [[WIDE_MASKED_GATHER]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = or <vscale x 2 x i1> [[VEC_PHI]], [[TMP9]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
index a7765f47180d..038e726adc24 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
@@ -432,6 +432,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -439,7 +440,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP9]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1)
; CHECK-NEXT: [[TMP11:%.*]] = udiv <vscale x 2 x i64> [[WIDE_LOAD]], [[TMP10]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[WIDE_LOAD]]
; CHECK-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP9]], align 8
@@ -477,6 +477,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer
+; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -486,10 +487,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
-; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
-; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP5]]
-; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]]
+; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]]
; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]]
; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8
@@ -560,6 +559,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -567,7 +567,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP9]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1)
; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], [[TMP10]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[WIDE_LOAD]]
; CHECK-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP9]], align 8
@@ -605,6 +604,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer
+; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -614,10 +614,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
-; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
-; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP5]]
-; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]]
+; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]]
; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]]
; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
index a330b6964a66..f323231445aa 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
@@ -37,16 +37,16 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP18]]
-; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]]
-; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]]
+; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]]
; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP19]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]]
; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll
index 66bb9357750c..3d23090dd123 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll
@@ -4,8 +4,6 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; FIXME: GEP flags on GEPs for reverse vector pointer need to be dropped when folding the tail.
-
define i1 @fn(ptr %nno) #0 {
; CHECK-LABEL: define i1 @fn(
; CHECK-SAME: ptr [[NNO:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -26,8 +24,8 @@ define i1 @fn(ptr %nno) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i32, ptr [[NNO]], i64 [[TMP22]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 -3
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[REVERSE]], <4 x i32> poison)
; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
index 270e6bcd9ab1..1a9e7ddb965f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
@@ -14,72 +14,70 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT16]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE15:.*]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[PRED_SDIV_CONTINUE15]] ]
-; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_SDIV_CONTINUE15]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE17:.*]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[PRED_SDIV_CONTINUE17]] ]
+; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_SDIV_CONTINUE17]] ]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]]
; CHECK: [[PRED_SDIV_IF]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]]
; CHECK: [[PRED_SDIV_CONTINUE]]:
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF2:.*]], label %[[PRED_SDIV_CONTINUE3:.*]]
-; CHECK: [[PRED_SDIV_IF2]]:
-; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE3]]
-; CHECK: [[PRED_SDIV_CONTINUE3]]:
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
-; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]]
; CHECK: [[PRED_SDIV_IF4]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE5]]
; CHECK: [[PRED_SDIV_CONTINUE5]]:
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SDIV_IF6:.*]], label %[[PRED_SDIV_CONTINUE7:.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF6:.*]], label %[[PRED_SDIV_CONTINUE7:.*]]
; CHECK: [[PRED_SDIV_IF6]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE7]]
; CHECK: [[PRED_SDIV_CONTINUE7]]:
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF8:.*]], label %[[PRED_SDIV_CONTINUE9:.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SDIV_IF8:.*]], label %[[PRED_SDIV_CONTINUE9:.*]]
; CHECK: [[PRED_SDIV_IF8]]:
-; CHECK-NEXT: [[TMP7:%.*]] = sdiv i16 [[X]], [[Y]]
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE9]]
; CHECK: [[PRED_SDIV_CONTINUE9]]:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[PRED_SDIV_CONTINUE7]] ], [ [[TMP8]], %[[PRED_SDIV_IF8]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_SDIV_IF10:.*]], label %[[PRED_SDIV_CONTINUE11:.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF10:.*]], label %[[PRED_SDIV_CONTINUE11:.*]]
; CHECK: [[PRED_SDIV_IF10]]:
-; CHECK-NEXT: [[TMP11:%.*]] = sdiv i16 [[X]], [[Y]]
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = sdiv i16 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE11]]
; CHECK: [[PRED_SDIV_CONTINUE11]]:
-; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_SDIV_CONTINUE9]] ], [ [[TMP12]], %[[PRED_SDIV_IF10]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
-; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF12:.*]], label %[[PRED_SDIV_CONTINUE13:.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[PRED_SDIV_CONTINUE9]] ], [ [[TMP8]], %[[PRED_SDIV_IF10]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_SDIV_IF12:.*]], label %[[PRED_SDIV_CONTINUE13:.*]]
; CHECK: [[PRED_SDIV_IF12]]:
-; CHECK-NEXT: [[TMP15:%.*]] = sdiv i16 [[X]], [[Y]]
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP13]], i16 [[TMP15]], i32 2
+; CHECK-NEXT: [[TMP11:%.*]] = sdiv i16 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP11]], i32 1
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE13]]
; CHECK: [[PRED_SDIV_CONTINUE13]]:
-; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i16> [ [[TMP13]], %[[PRED_SDIV_CONTINUE11]] ], [ [[TMP16]], %[[PRED_SDIV_IF12]] ]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
-; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_SDIV_IF14:.*]], label %[[PRED_SDIV_CONTINUE15]]
+; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_SDIV_CONTINUE11]] ], [ [[TMP12]], %[[PRED_SDIV_IF12]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF14:.*]], label %[[PRED_SDIV_CONTINUE15:.*]]
; CHECK: [[PRED_SDIV_IF14]]:
-; CHECK-NEXT: [[TMP19:%.*]] = sdiv i16 [[X]], [[Y]]
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP19]], i32 3
+; CHECK-NEXT: [[TMP15:%.*]] = sdiv i16 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP13]], i16 [[TMP15]], i32 2
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE15]]
; CHECK: [[PRED_SDIV_CONTINUE15]]:
-; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP17]], %[[PRED_SDIV_CONTINUE13]] ], [ [[TMP20]], %[[PRED_SDIV_IF14]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i16> [ [[TMP13]], %[[PRED_SDIV_CONTINUE13]] ], [ [[TMP16]], %[[PRED_SDIV_IF14]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_SDIV_IF16:.*]], label %[[PRED_SDIV_CONTINUE17]]
+; CHECK: [[PRED_SDIV_IF16]]:
+; CHECK-NEXT: [[TMP19:%.*]] = sdiv i16 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP19]], i32 3
+; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE17]]
+; CHECK: [[PRED_SDIV_CONTINUE17]]:
+; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP17]], %[[PRED_SDIV_CONTINUE15]] ], [ [[TMP20]], %[[PRED_SDIV_IF16]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i16> zeroinitializer, <4 x i16> [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true)
-; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true)
; CHECK-NEXT: [[TMP24]] = or <4 x i1> [[VEC_PHI]], [[TMP22]]
-; CHECK-NEXT: [[TMP25]] = or <4 x i1> [[VEC_PHI1]], [[TMP23]]
+; CHECK-NEXT: [[TMP25]] = or <4 x i1> [[VEC_PHI3]], [[TMP22]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll
index 8d56c3386a3b..cfae26a3a425 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll
@@ -15,19 +15,18 @@ define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]]
; CHECK: [[PRED_UREM_IF]]:
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]]
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]]
; CHECK: [[PRED_UREM_CONTINUE]]:
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[REM]], %[[PRED_UREM_IF]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]]
; CHECK: [[PRED_UREM_IF1]]:
; CHECK-NEXT: [[TMP6:%.*]] = urem i64 [[MUL]], [[X]]
@@ -48,7 +47,7 @@ define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK: [[PRED_UREM_CONTINUE6]]:
; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP9]], i64 0)
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: [[P:%.*]] = select i1 [[TMP14]], i64 [[TMP12]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[TMP15]], i64 [[TMP13]], i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index c14c34cade6b..a0294f7ac799 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -129,6 +129,7 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[TOBOOL6]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label [[VECTOR_BODY29:%.*]]
; CHECK: vector.body29:
; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH30]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY29]] ]
@@ -138,7 +139,6 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]]
; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]]
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP32]], i64 0
-; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[TMP34]])
; CHECK-NEXT: [[TMP35:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1)
; CHECK-NEXT: [[TMP36:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP35]]
@@ -173,16 +173,17 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[N_VEC53]], 2
; CHECK-NEXT: [[IND_END54:%.*]] = add i64 8, [[TMP43]]
; CHECK-NEXT: [[IND_END57:%.*]] = mul i64 [[N_VEC53]], 2
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT50:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT50]], <8 x i1> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], splat (i1 true)
; CHECK-NEXT: [[DOTSPLATINSERT62:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL42]], i64 0
; CHECK-NEXT: [[DOTSPLAT63:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT62]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION64:%.*]] = add <8 x i64> [[DOTSPLAT63]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
; CHECK-NEXT: [[DOTSPLATINSERT67:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL44]], i64 0
; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT72:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT72]], <8 x i1> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY50:%.*]]
-; CHECK: vec.epilog.vector.body50:
+; CHECK: vec.epilog.vector.body52:
; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ]
; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ]
; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ]
@@ -190,7 +191,6 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]]
; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP46]], i64 0
-; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], splat (i1 true)
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[TMP48]])
; CHECK-NEXT: [[TMP49:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1)
; CHECK-NEXT: [[TMP50:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP49]]
diff --git a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll
index 4c95584ff253..2fea016218e6 100644
--- a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll
+++ b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll
@@ -171,11 +171,11 @@ define i64 @invar_cond_incoming_ops_reordered(i1 %c) {
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index c4509e4ad56e..7db53d8ffced 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -172,6 +172,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; UNROLL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2
; UNROLL-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
; UNROLL-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]]
+; UNROLL-NEXT: [[TMP13:%.*]] = xor i1 [[COND_2:%.*]], true
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
@@ -184,7 +185,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP6]]
; UNROLL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
; UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-; UNROLL-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]]
+; UNROLL-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]]
; UNROLL: pred.store.if:
; UNROLL-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
; UNROLL-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4
@@ -192,10 +193,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; UNROLL: pred.store.continue3:
; UNROLL-NEXT: [[TMP11:%.*]] = add i32 [[VEC_PHI]], 1
; UNROLL-NEXT: [[TMP12:%.*]] = add i32 [[VEC_PHI1]], 1
-; UNROLL-NEXT: [[TMP13:%.*]] = xor i1 [[COND_2]], true
-; UNROLL-NEXT: [[TMP14:%.*]] = xor i1 [[COND_2]], true
; UNROLL-NEXT: [[PREDPHI]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]]
-; UNROLL-NEXT: [[PREDPHI4]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]]
+; UNROLL-NEXT: [[PREDPHI4]] = select i1 [[TMP13]], i32 [[VEC_PHI1]], i32 [[TMP12]]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -244,6 +243,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; UNROLL-NOSIMPLIFY-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
; UNROLL-NOSIMPLIFY-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; UNROLL-NOSIMPLIFY-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]]
+; UNROLL-NOSIMPLIFY-NEXT: [[TMP12:%.*]] = xor i1 [[COND_2:%.*]], true
; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NOSIMPLIFY: vector.body:
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
@@ -256,7 +256,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP5]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4
; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
-; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NOSIMPLIFY: pred.store.if:
; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -268,10 +268,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; UNROLL-NOSIMPLIFY: pred.store.continue3:
; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = add i32 [[VEC_PHI]], 1
; UNROLL-NOSIMPLIFY-NEXT: [[TMP11:%.*]] = add i32 [[VEC_PHI1]], 1
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP12:%.*]] = xor i1 [[COND_2]], true
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP13:%.*]] = xor i1 [[COND_2]], true
; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 [[TMP12]], i32 [[VEC_PHI]], i32 [[TMP10]]
-; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI4]] = select i1 [[TMP13]], i32 [[VEC_PHI1]], i32 [[TMP11]]
+; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI4]] = select i1 [[TMP12]], i32 [[VEC_PHI1]], i32 [[TMP11]]
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NOSIMPLIFY-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -321,9 +319,10 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
; VEC-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]]
-; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0
; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i64 0
; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0
; VEC-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
@@ -351,7 +350,6 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.continue2:
; VEC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 1)
-; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; VEC-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP17]], <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP16]]
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VEC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 2175eab9752c..96311de673d8 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -1962,6 +1962,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
@@ -1989,7 +1990,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]]
; CHECK: pred.udiv.continue2:
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP13]]
; CHECK-NEXT: [[TMP15]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
@@ -2030,6 +2030,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; IND: vector.ph:
; IND-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483646
; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
+; IND-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison>
+; IND-NEXT: [[TMP12:%.*]] = shufflevector <2 x i1> [[TMP11]], <2 x i1> poison, <2 x i32> zeroinitializer
; IND-NEXT: br label [[VECTOR_BODY:%.*]]
; IND: vector.body:
; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
@@ -2054,8 +2056,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; IND-NEXT: br label [[PRED_UDIV_CONTINUE2]]
; IND: pred.udiv.continue2:
; IND-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP5]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], [[PRED_UDIV_IF1]] ]
-; IND-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison>
-; IND-NEXT: [[TMP12:%.*]] = shufflevector <2 x i1> [[TMP11]], <2 x i1> poison, <2 x i32> zeroinitializer
; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP10]]
; IND-NEXT: [[TMP13]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
@@ -2097,7 +2097,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483644
; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
-; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; UNROLL-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison>
+; UNROLL-NEXT: [[TMP28:%.*]] = shufflevector <2 x i1> [[TMP27]], <2 x i1> poison, <2 x i32> zeroinitializer
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
@@ -2143,8 +2144,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE8]]
; UNROLL: pred.udiv.continue8:
; UNROLL-NEXT: [[TMP21:%.*]] = phi <2 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ]
-; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP11]], <2 x i32> [[WIDE_LOAD]]
-; UNROLL-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP21]], <2 x i32> [[WIDE_LOAD2]]
+; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP28]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP11]]
+; UNROLL-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP28]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP21]]
; UNROLL-NEXT: [[TMP22]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
; UNROLL-NEXT: [[TMP23]] = add <2 x i32> [[PREDPHI9]], [[VEC_PHI1]]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -2189,6 +2190,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
@@ -2239,10 +2241,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE8]]
; UNROLL-NO-IC: pred.udiv.continue8:
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = phi <2 x i32> [ [[TMP20]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP25]], [[PRED_UDIV_IF7]] ]
-; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]]
-; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP28]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP26]]
+; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP26]]
; UNROLL-NO-IC-NEXT: [[TMP29]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
; UNROLL-NO-IC-NEXT: [[TMP30]] = add <2 x i32> [[PREDPHI9]], [[VEC_PHI1]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -2284,7 +2284,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE: vector.ph:
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483640
; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0
-; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; INTERLEAVE-NEXT: [[TMP47:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison, i1 poison, i1 poison>
+; INTERLEAVE-NEXT: [[TMP48:%.*]] = shufflevector <4 x i1> [[TMP47]], <4 x i1> poison, <4 x i32> zeroinitializer
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE16:%.*]] ]
@@ -2366,8 +2367,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE16]]
; INTERLEAVE: pred.udiv.continue16:
; INTERLEAVE-NEXT: [[TMP41:%.*]] = phi <4 x i32> [ [[TMP36]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP40]], [[PRED_UDIV_IF15]] ]
-; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP21]], <4 x i32> [[WIDE_LOAD]]
-; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP41]], <4 x i32> [[WIDE_LOAD2]]
+; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP21]]
+; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> [[TMP41]]
; INTERLEAVE-NEXT: [[TMP42]] = add <4 x i32> [[PREDPHI]], [[VEC_PHI]]
; INTERLEAVE-NEXT: [[TMP43]] = add <4 x i32> [[PREDPHI17]], [[VEC_PHI1]]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
index bc1c1bf04a37..e8ad6a38d742 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
@@ -134,12 +134,12 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP]], i64 3
-; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 3
+; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 0
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT6]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 3
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 0
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll
index ed7762fbc2ba..fe660a826967 100644
--- a/llvm/test/Transforms/LoopVectorize/pr37248.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll
@@ -41,26 +41,26 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) {
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[START]], [[N_VEC]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]]
-; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
-; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[TMP10]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
+; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[TMP11]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: store i32 10, ptr [[B]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.if2:
; CHECK-NEXT: store i32 10, ptr [[B]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.continue3:
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i32 -1
; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
index 4f47e66816c9..a129a4b1928c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
@@ -6,25 +6,25 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 {
; CHECK-NEXT: bb:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT3]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], splat (i1 true)
+; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT5]], splat (i32 1)
+; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 35902, i32 0>, [[VECTOR_PH]] ], [ [[PREDPHI7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10)
-; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], splat (i1 true)
-; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20)
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> [[VEC_IND]]
; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 9), <2 x i32> [[PREDPHI]]
; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[VEC_PHI]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
index 8a8439fca439..ca971f15e487 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
@@ -89,13 +89,13 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1)
+; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1)
-; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255)
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[PREDPHI]]
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
index 301526cf3070..550e52d31823 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -1006,11 +1006,11 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3)
+; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
; CHECK-VF4IC1-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI]], [[TMP1]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1048,6 +1048,7 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3)
+; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -1055,13 +1056,9 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
-; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
-; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
-; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
-; CHECK-VF4IC4-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP1]]
-; CHECK-VF4IC4-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI1]], [[TMP2]]
-; CHECK-VF4IC4-NEXT: [[TMP7]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
+; CHECK-VF4IC4-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
+; CHECK-VF4IC4-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI1]], [[TMP4]]
+; CHECK-VF4IC4-NEXT: [[TMP7]] = or <4 x i1> [[VEC_PHI2]], [[TMP4]]
; CHECK-VF4IC4-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI3]], [[TMP4]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1100,6 +1097,7 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = icmp eq i32 [[A]], 3
+; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = xor i1 [[TMP0]], true
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -1107,13 +1105,9 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true
-; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = xor i1 [[TMP0]], true
-; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = xor i1 [[TMP0]], true
-; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = xor i1 [[TMP0]], true
-; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP1]]
-; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP2]]
-; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP3]]
+; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP4]]
+; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP4]]
+; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP4]]
; CHECK-VF1IC4-NEXT: [[TMP8]] = or i1 [[VEC_PHI3]], [[TMP4]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
index 7590bb9d6868..4ba9cc661313 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
@@ -281,12 +281,12 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) {
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
-; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 276
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]]
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
index 31732f027f6d..892ddccbc93b 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
@@ -10,12 +10,12 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll b/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll
new file mode 100644
index 000000000000..252061335e73
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll
@@ -0,0 +1,52 @@
+; REQUIRES: asserts
+; RUN: opt -p loop-vectorize -debug %s 2>&1 | FileCheck %s
+
+
+; CHECK-LABEL: LV: Checking a loop in 'latch_exit_cannot_compute_btc_due_to_step'
+; CHECK: LV: Did not find one integer induction var.
+; CHECK-NEXT: LV: Not vectorizing: Early exit is not the latch predecessor.
+; CHECK-NEXT: LV: Interleaving disabled by the pass manager
+; CHECK-NEXT: LV: Not vectorizing: Cannot prove legality.
+
+; CHECK-LABEL: LV: Checking a loop in 'header_exit_cannot_compute_btc_due_to_step'
+; CHECK: LV: Found an induction variable.
+; CHECK-NEXT: LV: Did not find one integer induction var.
+; CHECK-NEXT: LV: Not vectorizing: Cannot determine exact exit count for latch block.
+; CHECK-NEXT: LV: Interleaving disabled by the pass manager
+; CHECK-NEXT: LV: Not vectorizing: Cannot prove legality.
+
+; CHECK-NOT: vector.body
+define void @latch_exit_cannot_compute_btc_due_to_step(ptr %dst, i64 %step) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %for.cond.us
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i64 %iv, %step
+ %gep = getelementptr i8, ptr %dst, i64 %iv
+ store i8 0, ptr %gep, align 1
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @header_exit_cannot_compute_btc_due_to_step(ptr %dst, i64 %step) {
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %iv.next = add i64 %iv, %step
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %loop.latch, label %exit
+
+loop.latch:
+ %gep = getelementptr i8, ptr %dst, i64 %iv
+ store i8 0, ptr %gep, align 1
+ br label %loop.header
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
index 72011ca7f484..7f5e0f3a77ef 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
@@ -133,11 +133,11 @@ define void @blend_chain_iv(i1 %c) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_IND]], <4 x i64> undef
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI]], <4 x i64> undef
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI1]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index fe5811e7e115..85b44a7076d1 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -707,6 +707,542 @@ outer.latch:
exit:
ret void
}
+
+declare void @llvm.assume(i1)
+
+; Test case for https://github.com/llvm/llvm-project/issues/121897.
+define void @scev_expand_step(i64 %x, ptr %dst) {
+; VF8UF1-LABEL: define void @scev_expand_step(
+; VF8UF1-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
+; VF8UF1-NEXT: [[ENTRY:.*]]:
+; VF8UF1-NEXT: [[C:%.*]] = icmp eq i64 [[X]], 65536
+; VF8UF1-NEXT: call void @llvm.assume(i1 [[C]])
+; VF8UF1-NEXT: [[FR:%.*]] = freeze i64 [[X]]
+; VF8UF1-NEXT: [[STEP:%.*]] = add i64 [[FR]], -65534
+; VF8UF1-NEXT: [[TMP0:%.*]] = udiv i64 15, [[STEP]]
+; VF8UF1-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF8UF1: [[VECTOR_PH]]:
+; VF8UF1-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], 7
+; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
+; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; VF8UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
+; VF8UF1-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]]
+; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF8UF1: [[VECTOR_BODY]]:
+; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
+; VF8UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+; VF8UF1-NEXT: [[TMP3:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT]]
+; VF8UF1-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
+; VF8UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; VF8UF1: [[PRED_STORE_IF]]:
+; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
+; VF8UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
+; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
+; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; VF8UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
+; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; VF8UF1: [[PRED_STORE_CONTINUE]]:
+; VF8UF1-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
+; VF8UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; VF8UF1: [[PRED_STORE_IF1]]:
+; VF8UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
+; VF8UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
+; VF8UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
+; VF8UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
+; VF8UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
+; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; VF8UF1: [[PRED_STORE_CONTINUE2]]:
+; VF8UF1-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
+; VF8UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; VF8UF1: [[PRED_STORE_IF3]]:
+; VF8UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
+; VF8UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
+; VF8UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
+; VF8UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
+; VF8UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
+; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; VF8UF1: [[PRED_STORE_CONTINUE4]]:
+; VF8UF1-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
+; VF8UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; VF8UF1: [[PRED_STORE_IF5]]:
+; VF8UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
+; VF8UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
+; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
+; VF8UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
+; VF8UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
+; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; VF8UF1: [[PRED_STORE_CONTINUE6]]:
+; VF8UF1-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
+; VF8UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; VF8UF1: [[PRED_STORE_IF7]]:
+; VF8UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
+; VF8UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
+; VF8UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
+; VF8UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
+; VF8UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
+; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; VF8UF1: [[PRED_STORE_CONTINUE8]]:
+; VF8UF1-NEXT: [[TMP29:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
+; VF8UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; VF8UF1: [[PRED_STORE_IF9]]:
+; VF8UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
+; VF8UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
+; VF8UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
+; VF8UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
+; VF8UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
+; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; VF8UF1: [[PRED_STORE_CONTINUE10]]:
+; VF8UF1-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
+; VF8UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; VF8UF1: [[PRED_STORE_IF11]]:
+; VF8UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
+; VF8UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
+; VF8UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
+; VF8UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
+; VF8UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
+; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; VF8UF1: [[PRED_STORE_CONTINUE12]]:
+; VF8UF1-NEXT: [[TMP39:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
+; VF8UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; VF8UF1: [[PRED_STORE_IF13]]:
+; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
+; VF8UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
+; VF8UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
+; VF8UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
+; VF8UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
+; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; VF8UF1: [[PRED_STORE_CONTINUE14]]:
+; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; VF8UF1: [[MIDDLE_BLOCK]]:
+; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF8UF1: [[SCALAR_PH]]:
+; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF8UF1-NEXT: br label %[[LOOP:.*]]
+; VF8UF1: [[LOOP]]:
+; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF8UF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[STEP]]
+; VF8UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]]
+; VF8UF1-NEXT: store i8 0, ptr [[GEP_DST]], align 1
+; VF8UF1-NEXT: [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16
+; VF8UF1-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; VF8UF1: [[EXIT]]:
+; VF8UF1-NEXT: ret void
+;
+; VF8UF2-LABEL: define void @scev_expand_step(
+; VF8UF2-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
+; VF8UF2-NEXT: [[ENTRY:.*]]:
+; VF8UF2-NEXT: [[C:%.*]] = icmp eq i64 [[X]], 65536
+; VF8UF2-NEXT: call void @llvm.assume(i1 [[C]])
+; VF8UF2-NEXT: [[FR:%.*]] = freeze i64 [[X]]
+; VF8UF2-NEXT: [[STEP:%.*]] = add i64 [[FR]], -65534
+; VF8UF2-NEXT: [[TMP0:%.*]] = udiv i64 15, [[STEP]]
+; VF8UF2-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF8UF2: [[VECTOR_PH]]:
+; VF8UF2-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], 15
+; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
+; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; VF8UF2-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
+; VF8UF2-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]]
+; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF8UF2: [[VECTOR_BODY]]:
+; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
+; VF8UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+; VF8UF2-NEXT: [[TMP3:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT]]
+; VF8UF2-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT]]
+; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
+; VF8UF2-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; VF8UF2: [[PRED_STORE_IF]]:
+; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[STEP]]
+; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 0, [[TMP6]]
+; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
+; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP9]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; VF8UF2: [[PRED_STORE_CONTINUE]]:
+; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
+; VF8UF2-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; VF8UF2: [[PRED_STORE_IF1]]:
+; VF8UF2-NEXT: [[TMP11:%.*]] = mul i64 1, [[STEP]]
+; VF8UF2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]]
+; VF8UF2-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[STEP]]
+; VF8UF2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP14]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; VF8UF2: [[PRED_STORE_CONTINUE2]]:
+; VF8UF2-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
+; VF8UF2-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; VF8UF2: [[PRED_STORE_IF3]]:
+; VF8UF2-NEXT: [[TMP16:%.*]] = mul i64 2, [[STEP]]
+; VF8UF2-NEXT: [[TMP17:%.*]] = add i64 0, [[TMP16]]
+; VF8UF2-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[STEP]]
+; VF8UF2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP18]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP19]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; VF8UF2: [[PRED_STORE_CONTINUE4]]:
+; VF8UF2-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
+; VF8UF2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; VF8UF2: [[PRED_STORE_IF5]]:
+; VF8UF2-NEXT: [[TMP21:%.*]] = mul i64 3, [[STEP]]
+; VF8UF2-NEXT: [[TMP22:%.*]] = add i64 0, [[TMP21]]
+; VF8UF2-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[STEP]]
+; VF8UF2-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP24]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; VF8UF2: [[PRED_STORE_CONTINUE6]]:
+; VF8UF2-NEXT: [[TMP25:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
+; VF8UF2-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; VF8UF2: [[PRED_STORE_IF7]]:
+; VF8UF2-NEXT: [[TMP26:%.*]] = mul i64 4, [[STEP]]
+; VF8UF2-NEXT: [[TMP27:%.*]] = add i64 0, [[TMP26]]
+; VF8UF2-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], [[STEP]]
+; VF8UF2-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP28]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP29]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; VF8UF2: [[PRED_STORE_CONTINUE8]]:
+; VF8UF2-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
+; VF8UF2-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; VF8UF2: [[PRED_STORE_IF9]]:
+; VF8UF2-NEXT: [[TMP31:%.*]] = mul i64 5, [[STEP]]
+; VF8UF2-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP31]]
+; VF8UF2-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[STEP]]
+; VF8UF2-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP33]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP34]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; VF8UF2: [[PRED_STORE_CONTINUE10]]:
+; VF8UF2-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
+; VF8UF2-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; VF8UF2: [[PRED_STORE_IF11]]:
+; VF8UF2-NEXT: [[TMP36:%.*]] = mul i64 6, [[STEP]]
+; VF8UF2-NEXT: [[TMP37:%.*]] = add i64 0, [[TMP36]]
+; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], [[STEP]]
+; VF8UF2-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP38]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP39]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; VF8UF2: [[PRED_STORE_CONTINUE12]]:
+; VF8UF2-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
+; VF8UF2-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; VF8UF2: [[PRED_STORE_IF13]]:
+; VF8UF2-NEXT: [[TMP41:%.*]] = mul i64 7, [[STEP]]
+; VF8UF2-NEXT: [[TMP42:%.*]] = add i64 0, [[TMP41]]
+; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[STEP]]
+; VF8UF2-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP43]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP44]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; VF8UF2: [[PRED_STORE_CONTINUE14]]:
+; VF8UF2-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP4]], i32 0
+; VF8UF2-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; VF8UF2: [[PRED_STORE_IF15]]:
+; VF8UF2-NEXT: [[TMP46:%.*]] = mul i64 8, [[STEP]]
+; VF8UF2-NEXT: [[TMP47:%.*]] = add i64 0, [[TMP46]]
+; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[TMP47]], [[STEP]]
+; VF8UF2-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP48]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP49]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; VF8UF2: [[PRED_STORE_CONTINUE16]]:
+; VF8UF2-NEXT: [[TMP50:%.*]] = extractelement <8 x i1> [[TMP4]], i32 1
+; VF8UF2-NEXT: br i1 [[TMP50]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; VF8UF2: [[PRED_STORE_IF17]]:
+; VF8UF2-NEXT: [[TMP51:%.*]] = mul i64 9, [[STEP]]
+; VF8UF2-NEXT: [[TMP52:%.*]] = add i64 0, [[TMP51]]
+; VF8UF2-NEXT: [[TMP53:%.*]] = add i64 [[TMP52]], [[STEP]]
+; VF8UF2-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP53]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP54]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; VF8UF2: [[PRED_STORE_CONTINUE18]]:
+; VF8UF2-NEXT: [[TMP55:%.*]] = extractelement <8 x i1> [[TMP4]], i32 2
+; VF8UF2-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; VF8UF2: [[PRED_STORE_IF19]]:
+; VF8UF2-NEXT: [[TMP56:%.*]] = mul i64 10, [[STEP]]
+; VF8UF2-NEXT: [[TMP57:%.*]] = add i64 0, [[TMP56]]
+; VF8UF2-NEXT: [[TMP58:%.*]] = add i64 [[TMP57]], [[STEP]]
+; VF8UF2-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP58]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP59]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; VF8UF2: [[PRED_STORE_CONTINUE20]]:
+; VF8UF2-NEXT: [[TMP60:%.*]] = extractelement <8 x i1> [[TMP4]], i32 3
+; VF8UF2-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; VF8UF2: [[PRED_STORE_IF21]]:
+; VF8UF2-NEXT: [[TMP61:%.*]] = mul i64 11, [[STEP]]
+; VF8UF2-NEXT: [[TMP62:%.*]] = add i64 0, [[TMP61]]
+; VF8UF2-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], [[STEP]]
+; VF8UF2-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP63]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP64]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE22]]
+; VF8UF2: [[PRED_STORE_CONTINUE22]]:
+; VF8UF2-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP4]], i32 4
+; VF8UF2-NEXT: br i1 [[TMP65]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; VF8UF2: [[PRED_STORE_IF23]]:
+; VF8UF2-NEXT: [[TMP66:%.*]] = mul i64 12, [[STEP]]
+; VF8UF2-NEXT: [[TMP67:%.*]] = add i64 0, [[TMP66]]
+; VF8UF2-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], [[STEP]]
+; VF8UF2-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP68]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP69]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE24]]
+; VF8UF2: [[PRED_STORE_CONTINUE24]]:
+; VF8UF2-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP4]], i32 5
+; VF8UF2-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; VF8UF2: [[PRED_STORE_IF25]]:
+; VF8UF2-NEXT: [[TMP71:%.*]] = mul i64 13, [[STEP]]
+; VF8UF2-NEXT: [[TMP72:%.*]] = add i64 0, [[TMP71]]
+; VF8UF2-NEXT: [[TMP73:%.*]] = add i64 [[TMP72]], [[STEP]]
+; VF8UF2-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP73]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP74]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE26]]
+; VF8UF2: [[PRED_STORE_CONTINUE26]]:
+; VF8UF2-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
+; VF8UF2-NEXT: br i1 [[TMP75]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; VF8UF2: [[PRED_STORE_IF27]]:
+; VF8UF2-NEXT: [[TMP76:%.*]] = mul i64 14, [[STEP]]
+; VF8UF2-NEXT: [[TMP77:%.*]] = add i64 0, [[TMP76]]
+; VF8UF2-NEXT: [[TMP78:%.*]] = add i64 [[TMP77]], [[STEP]]
+; VF8UF2-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP79]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE28]]
+; VF8UF2: [[PRED_STORE_CONTINUE28]]:
+; VF8UF2-NEXT: [[TMP80:%.*]] = extractelement <8 x i1> [[TMP4]], i32 7
+; VF8UF2-NEXT: br i1 [[TMP80]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; VF8UF2: [[PRED_STORE_IF29]]:
+; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[STEP]]
+; VF8UF2-NEXT: [[TMP82:%.*]] = add i64 0, [[TMP81]]
+; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[STEP]]
+; VF8UF2-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP83]]
+; VF8UF2-NEXT: store i8 0, ptr [[TMP84]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]]
+; VF8UF2: [[PRED_STORE_CONTINUE30]]:
+; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; VF8UF2: [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF8UF2: [[SCALAR_PH]]:
+; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF8UF2-NEXT: br label %[[LOOP:.*]]
+; VF8UF2: [[LOOP]]:
+; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF8UF2-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[STEP]]
+; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]]
+; VF8UF2-NEXT: store i8 0, ptr [[GEP_DST]], align 1
+; VF8UF2-NEXT: [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16
+; VF8UF2-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF8UF2: [[EXIT]]:
+; VF8UF2-NEXT: ret void
+;
+; VF16UF1-LABEL: define void @scev_expand_step(
+; VF16UF1-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
+; VF16UF1-NEXT: [[ENTRY:.*]]:
+; VF16UF1-NEXT: [[C:%.*]] = icmp eq i64 [[X]], 65536
+; VF16UF1-NEXT: call void @llvm.assume(i1 [[C]])
+; VF16UF1-NEXT: [[FR:%.*]] = freeze i64 [[X]]
+; VF16UF1-NEXT: [[STEP:%.*]] = add i64 [[FR]], -65534
+; VF16UF1-NEXT: [[TMP0:%.*]] = udiv i64 15, [[STEP]]
+; VF16UF1-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF16UF1: [[VECTOR_PH]]:
+; VF16UF1-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], 15
+; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
+; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; VF16UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
+; VF16UF1-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]]
+; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF16UF1: [[VECTOR_BODY]]:
+; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
+; VF16UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
+; VF16UF1-NEXT: [[TMP3:%.*]] = icmp ule <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT]]
+; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
+; VF16UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; VF16UF1: [[PRED_STORE_IF]]:
+; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
+; VF16UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
+; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
+; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; VF16UF1: [[PRED_STORE_CONTINUE]]:
+; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP3]], i32 1
+; VF16UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; VF16UF1: [[PRED_STORE_IF1]]:
+; VF16UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
+; VF16UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
+; VF16UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
+; VF16UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; VF16UF1: [[PRED_STORE_CONTINUE2]]:
+; VF16UF1-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP3]], i32 2
+; VF16UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; VF16UF1: [[PRED_STORE_IF3]]:
+; VF16UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
+; VF16UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
+; VF16UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
+; VF16UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; VF16UF1: [[PRED_STORE_CONTINUE4]]:
+; VF16UF1-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP3]], i32 3
+; VF16UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; VF16UF1: [[PRED_STORE_IF5]]:
+; VF16UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
+; VF16UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
+; VF16UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
+; VF16UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; VF16UF1: [[PRED_STORE_CONTINUE6]]:
+; VF16UF1-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP3]], i32 4
+; VF16UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; VF16UF1: [[PRED_STORE_IF7]]:
+; VF16UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
+; VF16UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
+; VF16UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
+; VF16UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; VF16UF1: [[PRED_STORE_CONTINUE8]]:
+; VF16UF1-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP3]], i32 5
+; VF16UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; VF16UF1: [[PRED_STORE_IF9]]:
+; VF16UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
+; VF16UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
+; VF16UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
+; VF16UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; VF16UF1: [[PRED_STORE_CONTINUE10]]:
+; VF16UF1-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP3]], i32 6
+; VF16UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; VF16UF1: [[PRED_STORE_IF11]]:
+; VF16UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
+; VF16UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
+; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
+; VF16UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; VF16UF1: [[PRED_STORE_CONTINUE12]]:
+; VF16UF1-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP3]], i32 7
+; VF16UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; VF16UF1: [[PRED_STORE_IF13]]:
+; VF16UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
+; VF16UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
+; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
+; VF16UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; VF16UF1: [[PRED_STORE_CONTINUE14]]:
+; VF16UF1-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP3]], i32 8
+; VF16UF1-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; VF16UF1: [[PRED_STORE_IF15]]:
+; VF16UF1-NEXT: [[TMP45:%.*]] = mul i64 8, [[STEP]]
+; VF16UF1-NEXT: [[TMP46:%.*]] = add i64 0, [[TMP45]]
+; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], [[STEP]]
+; VF16UF1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP47]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP48]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; VF16UF1: [[PRED_STORE_CONTINUE16]]:
+; VF16UF1-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[TMP3]], i32 9
+; VF16UF1-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; VF16UF1: [[PRED_STORE_IF17]]:
+; VF16UF1-NEXT: [[TMP50:%.*]] = mul i64 9, [[STEP]]
+; VF16UF1-NEXT: [[TMP51:%.*]] = add i64 0, [[TMP50]]
+; VF16UF1-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], [[STEP]]
+; VF16UF1-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP52]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP53]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; VF16UF1: [[PRED_STORE_CONTINUE18]]:
+; VF16UF1-NEXT: [[TMP54:%.*]] = extractelement <16 x i1> [[TMP3]], i32 10
+; VF16UF1-NEXT: br i1 [[TMP54]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; VF16UF1: [[PRED_STORE_IF19]]:
+; VF16UF1-NEXT: [[TMP55:%.*]] = mul i64 10, [[STEP]]
+; VF16UF1-NEXT: [[TMP56:%.*]] = add i64 0, [[TMP55]]
+; VF16UF1-NEXT: [[TMP57:%.*]] = add i64 [[TMP56]], [[STEP]]
+; VF16UF1-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP57]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP58]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; VF16UF1: [[PRED_STORE_CONTINUE20]]:
+; VF16UF1-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP3]], i32 11
+; VF16UF1-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; VF16UF1: [[PRED_STORE_IF21]]:
+; VF16UF1-NEXT: [[TMP60:%.*]] = mul i64 11, [[STEP]]
+; VF16UF1-NEXT: [[TMP61:%.*]] = add i64 0, [[TMP60]]
+; VF16UF1-NEXT: [[TMP62:%.*]] = add i64 [[TMP61]], [[STEP]]
+; VF16UF1-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP62]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP63]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]]
+; VF16UF1: [[PRED_STORE_CONTINUE22]]:
+; VF16UF1-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP3]], i32 12
+; VF16UF1-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; VF16UF1: [[PRED_STORE_IF23]]:
+; VF16UF1-NEXT: [[TMP65:%.*]] = mul i64 12, [[STEP]]
+; VF16UF1-NEXT: [[TMP66:%.*]] = add i64 0, [[TMP65]]
+; VF16UF1-NEXT: [[TMP67:%.*]] = add i64 [[TMP66]], [[STEP]]
+; VF16UF1-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP67]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP68]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]]
+; VF16UF1: [[PRED_STORE_CONTINUE24]]:
+; VF16UF1-NEXT: [[TMP69:%.*]] = extractelement <16 x i1> [[TMP3]], i32 13
+; VF16UF1-NEXT: br i1 [[TMP69]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; VF16UF1: [[PRED_STORE_IF25]]:
+; VF16UF1-NEXT: [[TMP70:%.*]] = mul i64 13, [[STEP]]
+; VF16UF1-NEXT: [[TMP71:%.*]] = add i64 0, [[TMP70]]
+; VF16UF1-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], [[STEP]]
+; VF16UF1-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP72]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP73]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]]
+; VF16UF1: [[PRED_STORE_CONTINUE26]]:
+; VF16UF1-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP3]], i32 14
+; VF16UF1-NEXT: br i1 [[TMP74]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; VF16UF1: [[PRED_STORE_IF27]]:
+; VF16UF1-NEXT: [[TMP75:%.*]] = mul i64 14, [[STEP]]
+; VF16UF1-NEXT: [[TMP76:%.*]] = add i64 0, [[TMP75]]
+; VF16UF1-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], [[STEP]]
+; VF16UF1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP77]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP78]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]]
+; VF16UF1: [[PRED_STORE_CONTINUE28]]:
+; VF16UF1-NEXT: [[TMP79:%.*]] = extractelement <16 x i1> [[TMP3]], i32 15
+; VF16UF1-NEXT: br i1 [[TMP79]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; VF16UF1: [[PRED_STORE_IF29]]:
+; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[STEP]]
+; VF16UF1-NEXT: [[TMP81:%.*]] = add i64 0, [[TMP80]]
+; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[STEP]]
+; VF16UF1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP82]]
+; VF16UF1-NEXT: store i8 0, ptr [[TMP83]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]]
+; VF16UF1: [[PRED_STORE_CONTINUE30]]:
+; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; VF16UF1: [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF16UF1: [[SCALAR_PH]]:
+; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF16UF1-NEXT: br label %[[LOOP:.*]]
+; VF16UF1: [[LOOP]]:
+; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VF16UF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[STEP]]
+; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]]
+; VF16UF1-NEXT: store i8 0, ptr [[GEP_DST]], align 1
+; VF16UF1-NEXT: [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16
+; VF16UF1-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF16UF1: [[EXIT]]:
+; VF16UF1-NEXT: ret void
+;
+entry:
+ %c = icmp eq i64 %x, 65536
+ call void @llvm.assume(i1 %c)
+ %fr = freeze i64 %x
+ %step = add i64 %fr, -65534
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i64 %iv, %step
+ %gep.dst = getelementptr i8, ptr %dst, i64 %iv.next
+ store i8 0, ptr %gep.dst, align 1
+ %ec = icmp slt i64 %iv.next, 16
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret void
+}
+
;.
; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -715,16 +1251,19 @@ exit:
; VF8UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
+; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
;.
; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; VF8UF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
;.
; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; VF16UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
;.