; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,dce < %s | FileCheck -check-prefixes=GCN %s ; The insertelements in the exit block use the various parts of the vectorized tree. These external uses are just creating an identity vector using a sequence ; of insert elements. Since these insertelements are just recreating the same vectors that were produced during vectorization, they should not increase the cost of vectorization. define void @phi_4(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, ptr %out2, i32 %flag) { ; GCN-LABEL: define void @phi_4( ; GCN-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], ptr [[OUT2:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] { ; GCN-NEXT: [[ENTRY:.*]]: ; GCN-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(3) [[INPTR0]], align 8 ; GCN-NEXT: [[GEP2:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 2 ; GCN-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP2]], align 2 ; GCN-NEXT: [[GEP4:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 4 ; GCN-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP4]], align 8 ; GCN-NEXT: [[GEP6:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 6 ; GCN-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP6]], align 2 ; GCN-NEXT: [[GEP8:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 8 ; GCN-NEXT: [[TMP4:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP8]], align 8 ; GCN-NEXT: [[GEP10:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 10 ; GCN-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP10]], align 2 ; GCN-NEXT: [[GEP12:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 12 ; GCN-NEXT: [[TMP6:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP12]], align 8 ; GCN-NEXT: [[GEP14:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 14 ; GCN-NEXT: [[TMP7:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP14]], align 2 ; GCN-NEXT: br label %[[DO_BODY:.*]] ; GCN: [[DO_BODY]]: ; GCN-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP16:%.*]], %[[DO_BODY]] ] ; GCN-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP17:%.*]], %[[DO_BODY]] ] ; GCN-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], %[[ENTRY]] ], [ [[TMP18:%.*]], %[[DO_BODY]] ] ; GCN-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP19:%.*]], %[[DO_BODY]] ] ; GCN-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP20:%.*]], %[[DO_BODY]] ] ; GCN-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP21:%.*]], %[[DO_BODY]] ] ; GCN-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP6]], %[[ENTRY]] ], [ [[TMP22:%.*]], %[[DO_BODY]] ] ; GCN-NEXT: [[TMP42:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP23:%.*]], %[[DO_BODY]] ] ; GCN-NEXT: [[TMP16]] = load <2 x i16>, ptr addrspace(3) [[INPTR0]], align 8 ; GCN-NEXT: [[TMP17]] = load <2 x i16>, ptr addrspace(3) [[GEP2]], align 2 ; GCN-NEXT: [[TMP18]] = load <2 x i16>, ptr addrspace(3) [[GEP4]], align 8 ; GCN-NEXT: [[TMP19]] = load <2 x i16>, ptr addrspace(3) [[GEP6]], align 2 ; GCN-NEXT: [[TMP20]] = load <2 x i16>, ptr addrspace(3) [[GEP8]], align 8 ; GCN-NEXT: [[TMP21]] = load <2 x i16>, ptr addrspace(3) [[GEP10]], align 2 ; GCN-NEXT: [[TMP22]] = load <2 x i16>, ptr addrspace(3) [[GEP12]], align 8 ; GCN-NEXT: [[TMP23]] = load <2 x i16>, ptr addrspace(3) [[GEP14]], align 2 ; GCN-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 ; GCN-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] ; GCN: [[EXIT]]: ; GCN-NEXT: [[TMP24:%.*]] = shufflevector <2 x i16> [[TMP16]], <2 x i16> [[TMP17]], <16 x i32> ; GCN-NEXT: [[TMP25:%.*]] = shufflevector <2 x i16> [[TMP18]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP26:%.*]] = shufflevector <16 x i16> [[TMP24]], <16 x i16> [[TMP25]], <16 x i32> ; GCN-NEXT: [[TMP27:%.*]] = shufflevector <2 x i16> [[TMP19]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP28:%.*]] = shufflevector <16 x i16> [[TMP26]], <16 x i16> [[TMP27]], <16 x i32> ; GCN-NEXT: [[TMP29:%.*]] = shufflevector <2 x i16> [[TMP20]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP30:%.*]] = shufflevector <16 x i16> [[TMP28]], <16 x i16> [[TMP29]], <16 x i32> ; GCN-NEXT: [[TMP31:%.*]] = shufflevector <2 x i16> [[TMP21]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP32:%.*]] = shufflevector <16 x i16> [[TMP30]], <16 x i16> [[TMP31]], <16 x i32> ; GCN-NEXT: [[TMP33:%.*]] = shufflevector <2 x i16> [[TMP22]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP47:%.*]] = shufflevector <16 x i16> [[TMP32]], <16 x i16> [[TMP33]], <16 x i32> ; GCN-NEXT: [[TMP48:%.*]] = shufflevector <2 x i16> [[TMP23]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP49:%.*]] = shufflevector <16 x i16> [[TMP47]], <16 x i16> [[TMP48]], <16 x i32> ; GCN-NEXT: [[TMP37:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> [[TMP1]], <16 x i32> ; GCN-NEXT: [[TMP38:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP39:%.*]] = shufflevector <16 x i16> [[TMP37]], <16 x i16> [[TMP38]], <16 x i32> ; GCN-NEXT: [[TMP40:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP41:%.*]] = shufflevector <16 x i16> [[TMP39]], <16 x i16> [[TMP40]], <16 x i32> ; GCN-NEXT: [[TMP57:%.*]] = shufflevector <2 x i16> [[TMP4]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP43:%.*]] = shufflevector <16 x i16> [[TMP41]], <16 x i16> [[TMP57]], <16 x i32> ; GCN-NEXT: [[TMP44:%.*]] = shufflevector <2 x i16> [[TMP5]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP45:%.*]] = shufflevector <16 x i16> [[TMP43]], <16 x i16> [[TMP44]], <16 x i32> ; GCN-NEXT: [[TMP46:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP58:%.*]] = shufflevector <16 x i16> [[TMP45]], <16 x i16> [[TMP46]], <16 x i32> ; GCN-NEXT: [[TMP60:%.*]] = shufflevector <2 x i16> [[TMP7]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[VEC2157:%.*]] = shufflevector <16 x i16> [[TMP58]], <16 x i16> [[TMP60]], <16 x i32> ; GCN-NEXT: [[TMP50:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[TMP51:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[VEC231:%.*]] = shufflevector <16 x i16> [[TMP50]], <16 x i16> [[TMP51]], <16 x i32> ; GCN-NEXT: [[TMP52:%.*]] = shufflevector <2 x i16> [[TMP10]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[VEC252:%.*]] = shufflevector <16 x i16> [[VEC231]], <16 x i16> [[TMP52]], <16 x i32> ; GCN-NEXT: [[TMP53:%.*]] = shufflevector <2 x i16> [[TMP11]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[VEC273:%.*]] = shufflevector <16 x i16> [[VEC252]], <16 x i16> [[TMP53]], <16 x i32> ; GCN-NEXT: [[TMP54:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[VEC294:%.*]] = shufflevector <16 x i16> [[VEC273]], <16 x i16> [[TMP54]], <16 x i32> ; GCN-NEXT: [[TMP55:%.*]] = shufflevector <2 x i16> [[TMP13]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[VEC2115:%.*]] = shufflevector <16 x i16> [[VEC294]], <16 x i16> [[TMP55]], <16 x i32> ; GCN-NEXT: [[TMP56:%.*]] = shufflevector <2 x i16> [[TMP14]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[VEC2136:%.*]] = shufflevector <16 x i16> [[VEC2115]], <16 x i16> [[TMP56]], <16 x i32> ; GCN-NEXT: [[TMP59:%.*]] = shufflevector <2 x i16> [[TMP42]], <2 x i16> poison, <16 x i32> ; GCN-NEXT: [[VEC2151:%.*]] = shufflevector <16 x i16> [[VEC2136]], <16 x i16> [[TMP59]], <16 x i32> ; GCN-NEXT: store <16 x i16> [[VEC2157]], ptr [[OUT]], align 32 ; GCN-NEXT: store <16 x i16> [[TMP49]], ptr [[OUT1]], align 32 ; GCN-NEXT: store <16 x i16> [[VEC2151]], ptr [[OUT2]], align 32 ; GCN-NEXT: ret void ; entry: %ele0 = load i16, ptr addrspace(3) %inptr0, align 8 %gep1 = getelementptr i16, ptr addrspace(3) %inptr0, i32 1 %ele1 = load i16, ptr addrspace(3) %gep1, align 1 %gep2 = getelementptr i16, ptr addrspace(3) %inptr0, i32 2 %ele2 = load i16, ptr addrspace(3) %gep2, align 2 %gep3 = getelementptr i16, ptr addrspace(3) %inptr0, i32 3 %ele3 = load i16, ptr addrspace(3) %gep3, align 1 %gep4 = getelementptr i16, ptr addrspace(3) %inptr0, i32 4 %ele4 = load i16, ptr addrspace(3) %gep4, align 8 %gep5 = getelementptr i16, ptr addrspace(3) %inptr0, i32 5 %ele5 = load i16, ptr addrspace(3) %gep5, align 1 %gep6 = getelementptr i16, ptr addrspace(3) %inptr0, i32 6 %ele6 = load i16, ptr addrspace(3) %gep6, align 2 %gep7 = getelementptr i16, ptr addrspace(3) %inptr0, i32 7 %ele7 = load i16, ptr addrspace(3) %gep7, align 1 %gep8 = getelementptr i16, ptr addrspace(3) %inptr0, i32 8 %ele8 = load i16, ptr addrspace(3) %gep8, align 8 %gep9 = getelementptr i16, ptr addrspace(3) %inptr0, i32 9 %ele9 = load i16, ptr addrspace(3) %gep9, align 1 %gep10 = getelementptr i16, ptr addrspace(3) %inptr0, i32 10 %ele10 = load i16, ptr addrspace(3) %gep10, align 2 %gep11 = getelementptr i16, ptr addrspace(3) %inptr0, i32 11 %ele11 = load i16, ptr addrspace(3) %gep11, align 1 %gep12 = getelementptr i16, ptr addrspace(3) %inptr0, i32 12 %ele12 = load i16, ptr addrspace(3) %gep12, align 8 %gep13 = getelementptr i16, ptr addrspace(3) %inptr0, i32 13 %ele13 = load i16, ptr addrspace(3) %gep13, align 1 %gep14 = getelementptr i16, ptr addrspace(3) %inptr0, i32 14 %ele14 = load i16, ptr addrspace(3) %gep14, align 2 %gep15 = getelementptr i16, ptr addrspace(3) %inptr0, i32 15 %ele15 = load i16, ptr addrspace(3) %gep15, align 1 br label %do.body do.body: %phi0 = phi i16 [ %ele0, %entry ], [ %otherele0, %do.body ] %phi1 = phi i16 [ %ele1, %entry ], [ %otherele1, %do.body ] %phi2 = phi i16 [ %ele2, %entry ], [ %otherele2, %do.body ] %phi3 = phi i16 [ %ele3, %entry ], [ %otherele3, %do.body ] %phi4 = phi i16 [ %ele4, %entry ], [ %otherele4, %do.body ] %phi5 = phi i16 [ %ele5, %entry ], [ %otherele5, %do.body ] %phi6 = phi i16 [ %ele6, %entry ], [ %otherele6, %do.body ] %phi7 = phi i16 [ %ele7, %entry ], [ %otherele7, %do.body ] %phi8 = phi i16 [ %ele8, %entry ], [ %otherele8, %do.body ] %phi9 = phi i16 [ %ele9, %entry ], [ %otherele9, %do.body ] %phi10 = phi i16 [ %ele10, %entry ], [ %otherele10, %do.body ] %phi11 = phi i16 [ %ele11, %entry ], [ %otherele11, %do.body ] %phi12 = phi i16 [ %ele12, %entry ], [ %otherele12, %do.body ] %phi13 = phi i16 [ %ele13, %entry ], [ %otherele13, %do.body ] %phi14 = phi i16 [ %ele14, %entry ], [ %otherele14, %do.body ] %phi15 = phi i16 [ %ele15, %entry ], [ %otherele15, %do.body ] %otherele0 = load i16, ptr addrspace(3) %inptr0, align 8 %otherele1 = load i16, ptr addrspace(3) %gep1, align 1 %otherele2 = load i16, ptr addrspace(3) %gep2, align 2 %otherele3 = load i16, ptr addrspace(3) %gep3, align 1 %otherele4 = load i16, ptr addrspace(3) %gep4, align 8 %otherele5 = load i16, ptr addrspace(3) %gep5, align 1 %otherele6 = load i16, ptr addrspace(3) %gep6, align 2 %otherele7 = load i16, ptr addrspace(3) %gep7, align 1 %otherele8 = load i16, ptr addrspace(3) %gep8, align 8 %otherele9 = load i16, ptr addrspace(3) %gep9, align 1 %otherele10 = load i16, ptr addrspace(3) %gep10, align 2 %otherele11 = load i16, ptr addrspace(3) %gep11, align 1 %otherele12 = load i16, ptr addrspace(3) %gep12, align 8 %otherele13 = load i16, ptr addrspace(3) %gep13, align 1 %otherele14 = load i16, ptr addrspace(3) %gep14, align 2 %otherele15 = load i16, ptr addrspace(3) %gep15, align 1 %cmp = icmp eq i32 %flag, 0 br i1 %cmp, label %exit, label %do.body exit: %vec00 = insertelement <16 x i16> poison, i16 %otherele0, i64 0 %vec01 = insertelement <16 x i16> %vec00, i16 %otherele1, i64 1 %vec02 = insertelement <16 x i16> %vec01, i16 %otherele2, i64 2 %vec03 = insertelement <16 x i16> %vec02, i16 %otherele3, i64 3 %vec04 = insertelement <16 x i16> %vec03, i16 %otherele4, i64 4 %vec05 = insertelement <16 x i16> %vec04, i16 %otherele5, i64 5 %vec06 = insertelement <16 x i16> %vec05, i16 %otherele6, i64 6 %vec07 = insertelement <16 x i16> %vec06, i16 %otherele7, i64 7 %vec08 = insertelement <16 x i16> %vec07, i16 %otherele8, i64 8 %vec09 = insertelement <16 x i16> %vec08, i16 %otherele9, i64 9 %vec010 = insertelement <16 x i16> %vec09, i16 %otherele10, i64 10 %vec011 = insertelement <16 x i16> %vec010, i16 %otherele11, i64 11 %vec012 = insertelement <16 x i16> %vec011, i16 %otherele12, i64 12 %vec013 = insertelement <16 x i16> %vec012, i16 %otherele13, i64 13 %vec014 = insertelement <16 x i16> %vec013, i16 %otherele14, i64 14 %vec015 = insertelement <16 x i16> %vec014, i16 %otherele15, i64 15 %vec10 = insertelement <16 x i16> poison, i16 %ele0, i64 0 %vec11 = insertelement <16 x i16> %vec10, i16 %ele1, i64 1 %vec12 = insertelement <16 x i16> %vec11, i16 %ele2, i64 2 %vec13 = insertelement <16 x i16> %vec12, i16 %ele3, i64 3 %vec14 = insertelement <16 x i16> %vec13, i16 %ele4, i64 4 %vec15 = insertelement <16 x i16> %vec14, i16 %ele5, i64 5 %vec16 = insertelement <16 x i16> %vec15, i16 %ele6, i64 6 %vec17 = insertelement <16 x i16> %vec16, i16 %ele7, i64 7 %vec18 = insertelement <16 x i16> %vec17, i16 %ele8, i64 8 %vec19 = insertelement <16 x i16> %vec18, i16 %ele9, i64 9 %vec110 = insertelement <16 x i16> %vec19, i16 %ele10, i64 10 %vec111 = insertelement <16 x i16> %vec110, i16 %ele11, i64 11 %vec112 = insertelement <16 x i16> %vec111, i16 %ele12, i64 12 %vec113 = insertelement <16 x i16> %vec112, i16 %ele13, i64 13 %vec114 = insertelement <16 x i16> %vec113, i16 %ele14, i64 14 %vec115 = insertelement <16 x i16> %vec114, i16 %ele15, i64 15 %vec20 = insertelement <16 x i16> poison, i16 %phi0, i64 0 %vec21 = insertelement <16 x i16> %vec20, i16 %phi1, i64 1 %vec22 = insertelement <16 x i16> %vec21, i16 %phi2, i64 2 %vec23 = insertelement <16 x i16> %vec22, i16 %phi3, i64 3 %vec24 = insertelement <16 x i16> %vec23, i16 %phi4, i64 4 %vec25 = insertelement <16 x i16> %vec24, i16 %phi5, i64 5 %vec26 = insertelement <16 x i16> %vec25, i16 %phi6, i64 6 %vec27 = insertelement <16 x i16> %vec26, i16 %phi7, i64 7 %vec28 = insertelement <16 x i16> %vec27, i16 %phi8, i64 8 %vec29 = insertelement <16 x i16> %vec28, i16 %phi9, i64 9 %vec210 = insertelement <16 x i16> %vec29, i16 %phi10, i64 10 %vec211 = insertelement <16 x i16> %vec210, i16 %phi11, i64 11 %vec212 = insertelement <16 x i16> %vec211, i16 %phi12, i64 12 %vec213 = insertelement <16 x i16> %vec212, i16 %phi13, i64 13 %vec214 = insertelement <16 x i16> %vec213, i16 %phi14, i64 14 %vec215 = insertelement <16 x i16> %vec214, i16 %phi15, i64 15 store <16 x i16> %vec115, ptr %out store <16 x i16> %vec015, ptr %out1 store <16 x i16> %vec215, ptr %out2 ret void }