diff options
Diffstat (limited to 'llvm/test/Transforms')
6 files changed, 272 insertions, 178 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 22009293b7f5..b9294a16f665 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -13,65 +13,74 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_START_1:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 1> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 2, i64 3> -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x ptr> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x ptr> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP5]]) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]]) -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP7]]) -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i32 2 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> [[TMP2]], ptr [[NEXT_GEP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]]) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10001, 10000 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 +; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi ptr [ [[PTR_START_1]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <2 x i64> <i64 0, i64 1> -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x ptr> [[TMP12]], zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x ptr> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP17]], align 1 -; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 2 -; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI5]], i64 2 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 10000 -; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX7]], 0 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX7]], 1 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP8]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP9]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP8]], i32 0 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT10]], 10000 +; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N3:%.*]] = icmp eq i64 10001, 10000 -; CHECK-NEXT: br i1 [[CMP_N3]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 10001, 10000 +; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ne ptr [[PTR_IV]], null ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I_I]]) ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 718b2e868fea..fbf347f9b1e3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -26,6 +26,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI8:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 @@ -45,32 +46,43 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT6]], [[TMP19]] ; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP20]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP7]] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0 -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP29]], align 4 -; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]] -; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP32]], align 4 -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2 +; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = mul i64 8, [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP26]], i32 0 +; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT10]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT11]], [[TMP27]] +; CHECK-NEXT: [[VECTOR_GEP12:%.*]] = mul <vscale x 2 x i64> [[TMP28]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP12]] +; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP23]], 1 +; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP30]], i32 0 +; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT13]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP32:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT14]], [[TMP31]] +; CHECK-NEXT: [[VECTOR_GEP15:%.*]] = mul <vscale x 2 x i64> [[TMP32]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP15]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <vscale x 2 x ptr> [[TMP29]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP34]], i32 0 +; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 4 +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 2 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[TMP34]], i32 [[TMP37]] +; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP38]], align 4 +; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP40]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[PTR_IND9]] = getelementptr i8, ptr [[POINTER_PHI8]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[CAST_CMO:%.*]] = sub i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CAST_CMO]], 8 -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[CAST_CMO]], 8 +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP42]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index bb36224db150..ca27d83a3702 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -46,53 +46,65 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8*, i8** [[START_1:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8** [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP7]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP5]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP11]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP12]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP13]], i64 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to <vscale x 2 x i8*>* -; CHECK-NEXT: store <vscale x 2 x i8*> [[TMP14]], <vscale x 2 x i8*>* [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i8*> [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to <vscale x 2 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP19]], align 1 -; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP18]] to <vscale x 2 x i8>* -; CHECK-NEXT: store <vscale x 2 x i8> [[TMP20]], <vscale x 2 x i8>* [[TMP21]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP23]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP9]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP10]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8*, i8** [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP16]], i32 0 +; CHECK-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT6]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP18:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT7]], [[TMP17]] +; CHECK-NEXT: [[VECTOR_GEP8:%.*]] = mul <vscale x 2 x i64> [[TMP18]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[POINTER_PHI4]], <vscale x 2 x i64> [[VECTOR_GEP8]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP19]], i64 1 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i8**> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8*, i8** [[TMP21]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to <vscale x 2 x i8*>* +; CHECK-NEXT: store <vscale x 2 x i8*> [[TMP20]], <vscale x 2 x i8*>* [[TMP23]], align 8 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i8*> [[TMP19]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[TMP24]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP26]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>* +; CHECK-NEXT: store <vscale x 2 x i8> [[TMP27]], <vscale x 2 x i8>* [[TMP28]], align 1 +; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8*, i8** [[POINTER_PHI]], i64 [[TMP7]] +; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, i8* [[POINTER_PHI4]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8** [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8* [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8* [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP_BODY:%.*]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY]] ] ; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi i8** [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[LOOP_BODY]] ] -; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi i8* [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi i8* [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ] ; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr inbounds i8*, i8** [[PTR_IV_1]], i64 1 ; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds i8, i8* [[PTR_IV_2]], i64 1 ; CHECK-NEXT: store i8* [[PTR_IV_2_NEXT]], i8** [[PTR_IV_1]], align 8 @@ -144,20 +156,32 @@ define void @pointer_induction(i8* noalias %start, i64 %N) { ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX2_0:%.*]] = add i64 [[INDEX2]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], i64 [[INDEX2_0]] -; CHECK-NEXT: [[NEXT_GEP_0:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[NEXT_GEP_0]] to <vscale x 2 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP16]], align 1 -; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[NEXT_GEP_0]] to <vscale x 2 x i8>* -; CHECK-NEXT: store <vscale x 2 x i8> [[TMP17]], <vscale x 2 x i8>* [[TMP18]], align 1 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x i8*> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <vscale x 2 x i8>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP15]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP14]] to <vscale x 2 x i8>* +; CHECK-NEXT: store <vscale x 2 x i8> [[TMP16]], <vscale x 2 x i8>* [[TMP17]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP19]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -169,8 +193,8 @@ define void @pointer_induction(i8* noalias %start, i64 %N) { ; CHECK-NEXT: [[PTR_PHI:%.*]] = phi i8* [ [[PTR_PHI_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX_NXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = load i8, i8* [[PTR_PHI]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP24]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[PTR_PHI]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP21]], 1 ; CHECK-NEXT: store i8 [[ADD]], i8* [[PTR_PHI]], align 1 ; CHECK-NEXT: [[PTR_PHI_NEXT]] = getelementptr inbounds i8, i8* [[PTR_PHI]], i64 1 ; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i8* [[PTR_PHI_NEXT]], [[START]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 658bcce1555c..2e6f1a0cf1ec 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -153,49 +153,55 @@ define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[SRC:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[SRC]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI7:%.*]] = phi i32* [ [[DST]], [[VECTOR_PH]] ], [ [[PTR_IND8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to <vscale x 4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i32 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP9]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[NEXT_GEP5]] to <vscale x 4 x i32>* -; CHECK-NEXT: store <vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[NEXT_GEP5]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <vscale x 4 x i32>* -; CHECK-NEXT: store <vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32>* [[TMP17]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[POINTER_PHI]] to <vscale x 4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <vscale x 4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD15]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[POINTER_PHI7]] to <vscale x 4 x i32>* +; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32>* [[TMP16]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw nsw i32 [[TMP17]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[POINTER_PHI7]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <vscale x 4 x i32>* +; CHECK-NEXT: store <vscale x 4 x i32> [[TMP15]], <vscale x 4 x i32>* [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP23]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP5]] +; CHECK-NEXT: [[PTR_IND8]] = getelementptr i32, i32* [[POINTER_PHI7]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32* [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32* [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[S_010:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[S_010]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP21]], 1 +; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[S_010]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP25]], 1 ; CHECK-NEXT: store i32 [[MUL]], i32* [[D_09]], align 4 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[D_09]], i64 1 ; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, i32* [[S_010]], i64 1 @@ -247,43 +253,46 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i32*, i32** [[B:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32*, i32** [[B:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i32** [ [[B]], [[VECTOR_PH]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <vscale x 2 x i64> [[TMP6]] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32*, i32** [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 1 ; CHECK-NEXT: [[BC:%.*]] = bitcast <vscale x 2 x i32*> [[TMP7]] to <vscale x 2 x <vscale x 2 x i32>*> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x <vscale x 2 x i32>*> [[BC]], i64 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32** [[NEXT_GEP]] to <vscale x 2 x i32*>* -; CHECK-NEXT: store <vscale x 2 x i32*> [[TMP7]], <vscale x 2 x i32*>* [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 2 x <vscale x 2 x i32>*> [[BC]], i64 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32** [[POINTER_PHI4]] to <vscale x 2 x i32*>* +; CHECK-NEXT: store <vscale x 2 x i32*> [[TMP7]], <vscale x 2 x i32*>* [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[PTR_IND5]] = getelementptr i32*, i32** [[POINTER_PHI4]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP11]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32** [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32** [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[VAR3:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[Q:%.*]] = phi i32** [ [[VAR4:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[Q:%.*]] = phi i32** [ [[VAR4:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[VAR2:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[VAR1:%.*]] = load i32, i32* [[P]], align 8 ; CHECK-NEXT: [[VAR2]] = add i32 [[VAR1]], [[VAR0]] @@ -294,7 +303,7 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR5]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll index 5018be175975..022912f3b855 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -7,19 +8,54 @@ target triple = "x86_64-linux-gnu" define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) { ; CHECK-LABEL: @test_pr55375_interleave_opaque_ptr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64 +; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 16 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ %start, %vector.ph ], [ [[PTR_IND:%.*]], %vector.body ] -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 16> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP11]], <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> -; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP12]], <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], %n.vec -; CHECK-NEXT: br i1 [[TMP13]], label %middle.block, label %vector.body +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[IV]], i64 0, i32 1 +; CHECK-NEXT: store ptr [[IV]], ptr [[IV_1]], align 8 +; CHECK-NEXT: store ptr null, ptr [[IV]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds [[PAIR]], ptr [[IV]], i64 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll index 7beb609e5ea6..5ec653531379 100644 --- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll +++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll @@ -57,16 +57,20 @@ define void @store_pointer_induction(ptr %start, ptr %end) { ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 8> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <2 x ptr> [[TMP5]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store <2 x ptr> [[TMP10]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] |
