diff options
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize/ARM')
9 files changed, 112 insertions, 390 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll index e83ac2eed2d4..58a24ee7c467 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll @@ -36,18 +36,6 @@ define void @f0(ptr noalias %dst, ptr readonly %src, i64 %n) #0 { ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP10]], 3 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 [[MUL]], ptr [[ARRAYIDX3]], align 1 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[FOR_END_LOOPEXIT]]: ; CHECK-NEXT: br label %[[FOR_END]] ; CHECK: [[FOR_END]]: @@ -81,7 +69,4 @@ attributes #0 = { nofree norecurse nounwind "target-features"="+armv8.1-m.main,+ ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} -; CHECK: [[META4]] = !{!"llvm.loop.vectorize.width", i32 16} -; CHECK: [[META5]] = !{!"llvm.loop.interleave.count", i32 2} ;. diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index e52d85c51ab7..9a76019ec5f4 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -25,21 +25,7 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 1 -; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i32 [[ADD5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_023]] -; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; @@ -212,21 +198,7 @@ define void @test_stride3_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 3 -; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i32 [[ADD5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_023]] -; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; @@ -273,21 +245,7 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 4 -; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i32 [[ADD5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_023]] -; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll index 4cdfcf2c87b9..0a4ed7ff2eb3 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -22,11 +22,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -75,11 +71,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP8]] ; @@ -126,11 +118,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] @@ -177,11 +165,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -228,11 +212,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -279,11 +259,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -330,11 +306,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]]) ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] @@ -381,11 +353,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP4]]) ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index fc7922762a0b..029d8bd64fe5 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -34,28 +34,11 @@ define i32 @mla_i32(ptr noalias nocapture readonly %A, ptr noalias nocapture rea ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[RES_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011]] -; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP12]] to i32 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CONV2:%.*]] = sext i8 [[TMP13]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]] -; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[RES_010]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[RES_0_LCSSA]] ; entry: @@ -112,28 +95,11 @@ define i32 @mla_i8(ptr noalias nocapture readonly %A, ptr noalias nocapture read ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[RES_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011]] -; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP12]] to i32 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CONV2:%.*]] = sext i8 [[TMP13]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]] -; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[RES_010]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[RES_0_LCSSA]] ; entry: @@ -183,25 +149,13 @@ define i32 @add_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = add i32 [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -245,26 +199,14 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 1, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = mul nsw i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -308,26 +250,14 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ -1, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = and i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -371,26 +301,14 @@ define i32 @or_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = or i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -434,26 +352,14 @@ define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = xor i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -497,26 +403,14 @@ define float @fadd_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = fadd fast float [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret float [[R_0_LCSSA]] ; entry: @@ -560,26 +454,14 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = fmul fast float [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret float [[R_0_LCSSA]] ; entry: @@ -622,7 +504,7 @@ define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -640,7 +522,7 @@ define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP7]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -689,7 +571,7 @@ define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -707,7 +589,7 @@ define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP7]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -756,7 +638,7 @@ define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -774,7 +656,7 @@ define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP7]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -823,7 +705,7 @@ define i32 @umax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -841,7 +723,7 @@ define i32 @umax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP7]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll index e154883ca7ec..9dbbf4cc3534 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll @@ -45,7 +45,7 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 % ; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]] ; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP20]], <4 x float> [[TMP9]], <4 x float> splat (float -0.000000e+00) +; CHECK-NEXT: [[TMP10:%.*]] = select ninf <4 x i1> [[TMP20]], <4 x float> [[TMP9]], <4 x float> splat (float -0.000000e+00) ; CHECK-NEXT: [[PREDPHI]] = fadd reassoc arcp contract afn <4 x float> [[VEC_PHI]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 3426fb16841c..6ea075f76aed 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -30,17 +30,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; DEFAULT-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -59,17 +48,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; OPTSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; OPTSIZE-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -88,17 +66,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; MINSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; MINSIZE-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; @@ -386,23 +353,6 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[TMP72:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i8 -; DEFAULT-NEXT: [[MUL:%.*]] = mul i8 [[A]], [[TMP72]] -; DEFAULT-NEXT: [[SHR:%.*]] = lshr i8 [[TMP72]], 1 -; DEFAULT-NEXT: [[MUL5:%.*]] = mul i8 [[SHR]], [[B]] -; DEFAULT-NEXT: [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]] -; DEFAULT-NEXT: [[SHR7:%.*]] = lshr i8 [[TMP72]], 2 -; DEFAULT-NEXT: [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]] -; DEFAULT-NEXT: [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; DEFAULT-NEXT: store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -502,23 +452,6 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; DEFAULT-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; DEFAULT-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; DEFAULT-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -545,23 +478,6 @@ define void @dont_vectorize_with_minsize() { ; OPTSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; OPTSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; OPTSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; OPTSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; OPTSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -588,23 +504,6 @@ define void @dont_vectorize_with_minsize() { ; MINSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; MINSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; MINSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; MINSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; MINSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; @@ -659,23 +558,6 @@ define void @vectorization_forced() { ; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; DEFAULT-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; DEFAULT-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; DEFAULT-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -702,23 +584,6 @@ define void @vectorization_forced() { ; OPTSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; OPTSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; OPTSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; OPTSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; OPTSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -745,23 +610,6 @@ define void @vectorization_forced() { ; MINSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; MINSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; MINSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; MINSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; MINSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll new file mode 100644 index 000000000000..fd83a012541b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -p loop-vectorize -S %s | FileCheck %s + +target triple = "armv7-unknown-linux-gnueabihf" + +define void @replicating_load_used_by_other_load(i32 %arg, ptr %a, i32 %b) { +; CHECK-LABEL: define void @replicating_load_used_by_other_load( +; CHECK-SAME: i32 [[ARG:%.*]], ptr [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[ARG]], %[[ENTRY]] ] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[IV]], 1 +; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[IV]], 1 +; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 [[IV]], 2 +; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 [[IV]], 1 +; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], 2 +; CHECK-NEXT: [[OR_1:%.*]] = or i32 [[AND_2]], [[AND_1]] +; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[OR_1]], [[SHL_1]] +; CHECK-NEXT: [[XOR_1:%.*]] = xor i32 [[B]], [[OR_2]] +; CHECK-NEXT: [[XOR_2:%.*]] = xor i32 [[XOR_1]], [[ARG]] +; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[SHL_1]], 1 +; CHECK-NEXT: [[XOR_3:%.*]] = xor i32 [[SHR]], [[ARG]] +; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[XOR_3]], 1 +; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[IV]], 2147483646 +; CHECK-NEXT: [[OR_3:%.*]] = or i32 [[AND_3]], [[AND_4]] +; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[IV]], 254 +; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 [[OR_3]], 1 +; CHECK-NEXT: [[XOR_4:%.*]] = xor i32 [[SHL_3]], 2 +; CHECK-NEXT: [[OR_4:%.*]] = or i32 [[AND_5]], [[XOR_4]] +; CHECK-NEXT: [[XOR_5:%.*]] = xor i32 [[SHR_2]], [[OR_4]] +; CHECK-NEXT: [[XOR_6:%.*]] = xor i32 [[XOR_5]], [[XOR_2]] +; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[XOR_6]], 255 +; CHECK-NEXT: [[XOR_7:%.*]] = xor i32 [[AND_6]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[XOR_7]] +; CHECK-NEXT: [[LD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD]] to i32 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i32, ptr null, i32 [[ZEXT]] +; CHECK-NEXT: store i32 0, ptr [[GEP_2]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ %arg, %entry ] + %shr = lshr i32 %iv, 1 + %and.1 = and i32 %iv, 1 + %shl.1 = shl i32 %iv, 2 + %shl.2 = shl i32 %iv, 1 + %and.2 = and i32 %shl.2, 2 + %or.1 = or i32 %and.2, %and.1 + %or.2 = or i32 %or.1, %shl.1 + %xor.1 = xor i32 %b, %or.2 + %xor.2 = xor i32 %xor.1, %arg + %shr.2 = lshr i32 %shl.1, 1 + %xor.3 = xor i32 %shr, %arg + %and.3 = and i32 %xor.3, 1 + %and.4 = and i32 %iv, 2147483646 + %or.3 = or i32 %and.3, %and.4 + %and.5 = and i32 %iv, 254 + %shl.3 = shl i32 %or.3, 1 + %xor.4 = xor i32 %shl.3, 2 + %or.4 = or i32 %and.5, %xor.4 + %xor.5 = xor i32 %shr.2, %or.4 + %xor.6 = xor i32 %xor.5, %xor.2 + %and.6 = and i32 %xor.6, 255 + %xor.7 = xor i32 %and.6, 1 + %gep = getelementptr i8, ptr %a, i32 %xor.7 + %ld = load i8, ptr %gep, align 1 + %zext = zext i8 %ld to i32 + %gep.2 = getelementptr i32, ptr null, i32 %zext + store i32 0, ptr %gep.2, align 4 + %iv.next = add i32 %iv, 1 + %cmp = icmp eq i32 %iv.next, 100 + br i1 %cmp, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll index 596e42e9f094..d0c11946c9de 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll @@ -36,7 +36,7 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond.not, label %exit, label %for.body } -define i32 @if_convert(ptr %a, ptr %b, i32 %start, i32 %end) #0 { +define void @if_convert(ptr %a, ptr %b, i32 %start, i32 %end) #0 { ; CHECK-COST-2: LV: Found an estimated cost of 0 for VF 1 For instruction: %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ] ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.032 @@ -70,7 +70,7 @@ for.cond.cleanup.loopexit: ; preds = %if.end br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret i32 undef + ret void for.body: ; preds = %for.body.preheader, %if.end %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll index 625f7a643a3a..1ae71c869540 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll @@ -52,7 +52,7 @@ define dso_local void @predicate_loop_hint(ptr noalias nocapture %A, ptr noalias ; CHECK: %index.next = add nuw i64 %index, 4 ; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP2:![0-9]+]] ; -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop [[SCALAR_LOOP2:![0-9]+]] +; CHECK-NOT: br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop entry: br label %for.body @@ -78,9 +78,6 @@ for.body: ; CHECK-NEXT: [[MD_RT_UNROLL_DIS]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK-NEXT: [[SCALAR_LOOP1]] = distinct !{[[SCALAR_LOOP1]], [[MD_RT_UNROLL_DIS]], [[MD_IS_VEC]]} ; CHECK-NEXT: [[VEC_LOOP2]] = distinct !{[[VEC_LOOP2]], [[MD_IS_VEC]], [[MD_RT_UNROLL_DIS]]} -; CHECK-NEXT: [[SCALAR_LOOP2]] = distinct !{[[SCALAR_LOOP2]], [[ORIG_PRED_ENABLED:!.+]], [[ORIG_VEC_ENABLED:!.+]]} -; CHECK-NEXT: [[ORIG_PRED_ENABLED]] = !{!"llvm.loop.vectorize.predicate.enable", i1 true} -; CHECK-NEXT: [[ORIG_VEC_ENABLED]] = !{!"llvm.loop.vectorize.enable", i1 true} !6 = distinct !{!6, !7, !8} !7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} |
