summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp176
1 files changed, 175 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 03b92d3338a9..0874b29ab7d2 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -39,6 +39,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
+#include "llvm/Analysis/HashRecognize.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -143,6 +144,14 @@ static cl::opt<bool, true>
cl::location(DisableLIRP::Wcslen), cl::init(false),
cl::ReallyHidden);
+bool DisableLIRP::HashRecognize;
+static cl::opt<bool, true>
+ DisableLIRPHashRecognize("disable-" DEBUG_TYPE "-hashrecognize",
+ cl::desc("Proceed with loop idiom recognize pass, "
+ "but do not optimize CRC loops."),
+ cl::location(DisableLIRP::HashRecognize),
+ cl::init(false), cl::ReallyHidden);
+
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling "
@@ -242,6 +251,7 @@ private:
const SCEV *BECount);
bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
bool IsLoopMemset = false);
+ bool optimizeCRCLoop(const PolynomialInfo &Info);
/// @}
/// \name Noncountable Loop Idiom Handling
@@ -287,6 +297,8 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
+ std::optional<PolynomialInfo> HR;
+
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
AR.MSSA, DL, ORE);
if (!LIR.runOnLoop(&L))
@@ -335,7 +347,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
HasMemcpy = TLI->has(LibFunc_memcpy);
- if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic || HasMemcpy)
+ if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic ||
+ HasMemcpy || !DisableLIRP::HashRecognize)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
return runOnCountableLoop();
@@ -378,6 +391,13 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
}
+
+ // Optimize a CRC loop if HashRecognize found one, provided we're not
+ // optimizing for size.
+ if (!DisableLIRP::HashRecognize && !ApplyCodeSizeHeuristics)
+ if (auto Res = HashRecognize(*CurLoop, *SE).getResult())
+ optimizeCRCLoop(*Res);
+
return MadeChange;
}
@@ -1514,6 +1534,160 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
return false;
}
+bool LoopIdiomRecognize::optimizeCRCLoop(const PolynomialInfo &Info) {
+ // FIXME: Hexagon has a special HexagonLoopIdiom that optimizes CRC using
+ // carry-less multiplication instructions, which is more efficient than our
+ // Sarwate table-lookup optimization. Hence, until we're able to emit
+ // target-specific instructions for Hexagon, subsuming HexagonLoopIdiom,
+ // disable the optimization for Hexagon.
+ Module &M = *CurLoop->getHeader()->getModule();
+ Triple TT(M.getTargetTriple());
+ if (TT.getArch() == Triple::hexagon)
+ return false;
+
+ // First, create a new GlobalVariable corresponding to the
+ // Sarwate-lookup-table.
+ Type *CRCTy = Info.LHS->getType();
+ unsigned CRCBW = CRCTy->getIntegerBitWidth();
+ std::array<Constant *, 256> CRCConstants;
+ transform(HashRecognize::genSarwateTable(Info.RHS, Info.ByteOrderSwapped),
+ CRCConstants.begin(),
+ [CRCTy](const APInt &E) { return ConstantInt::get(CRCTy, E); });
+ Constant *ConstArray =
+ ConstantArray::get(ArrayType::get(CRCTy, 256), CRCConstants);
+ GlobalVariable *GV =
+ new GlobalVariable(M, ConstArray->getType(), true,
+ GlobalValue::PrivateLinkage, ConstArray, ".crctable");
+
+ PHINode *IV = CurLoop->getCanonicalInductionVariable();
+ SmallVector<PHINode *, 2> Cleanup;
+
+ // Next, mark all PHIs for removal except IV.
+ {
+ for (PHINode &PN : CurLoop->getHeader()->phis()) {
+ if (&PN == IV)
+ continue;
+ PN.replaceAllUsesWith(PoisonValue::get(PN.getType()));
+ Cleanup.push_back(&PN);
+ }
+ }
+
+ // Next, fix up the trip count.
+ {
+ unsigned NewBTC = (Info.TripCount / 8) - 1;
+ BasicBlock *LoopBlk = CurLoop->getLoopLatch();
+ BranchInst *BrInst = cast<BranchInst>(LoopBlk->getTerminator());
+ CmpPredicate ExitPred = BrInst->getSuccessor(0) == LoopBlk
+ ? ICmpInst::Predicate::ICMP_NE
+ : ICmpInst::Predicate::ICMP_EQ;
+ Instruction *ExitCond = CurLoop->getLatchCmpInst();
+ Value *ExitLimit = ConstantInt::get(IV->getType(), NewBTC);
+ IRBuilder<> Builder(ExitCond);
+ Value *NewExitCond =
+ Builder.CreateICmp(ExitPred, IV, ExitLimit, "exit.cond");
+ ExitCond->replaceAllUsesWith(NewExitCond);
+ deleteDeadInstruction(ExitCond);
+ }
+
+ // Finally, fill the loop with the Sarwate-table-lookup logic, and replace all
+ // uses of ComputedValue.
+ //
+ // Little-endian:
+ // crc = (crc >> 8) ^ tbl[(iv'th byte of data) ^ (bottom byte of crc)]
+ // Big-Endian:
+ // crc = (crc << 8) ^ tbl[(iv'th byte of data) ^ (top byte of crc)]
+ {
+ auto LoByte = [](IRBuilderBase &Builder, Value *Op, const Twine &Name) {
+ Type *OpTy = Op->getType();
+ unsigned OpBW = OpTy->getIntegerBitWidth();
+ return OpBW > 8
+ ? Builder.CreateAnd(Op, ConstantInt::get(OpTy, 0XFF), Name)
+ : Op;
+ };
+ auto HiIdx = [LoByte, CRCBW](IRBuilderBase &Builder, Value *Op,
+ const Twine &Name) {
+ Type *OpTy = Op->getType();
+
+ // When the bitwidth of the CRC mismatches the Op's bitwidth, we need to
+ // use the CRC's bitwidth as the reference for shifting right.
+ return LoByte(Builder,
+ CRCBW > 8 ? Builder.CreateLShr(
+ Op, ConstantInt::get(OpTy, CRCBW - 8), Name)
+ : Op,
+ Name + ".lo.byte");
+ };
+
+ IRBuilder<> Builder(CurLoop->getHeader(),
+ CurLoop->getHeader()->getFirstNonPHIIt());
+
+ // Create the CRC PHI, and initialize its incoming value to the initial
+ // value of CRC.
+ PHINode *CRCPhi = Builder.CreatePHI(CRCTy, 2, "crc");
+ CRCPhi->addIncoming(Info.LHS, CurLoop->getLoopPreheader());
+
+ // CRC is now an evolving variable, initialized to the PHI.
+ Value *CRC = CRCPhi;
+
+ // TableIndexer = ((top|bottom) byte of CRC). It is XOR'ed with (iv'th byte
+ // of LHSAux), if LHSAux is non-nullptr.
+ Value *Indexer = CRC;
+ if (Value *Data = Info.LHSAux) {
+ Type *DataTy = Data->getType();
+
+ // To index into the (iv'th byte of LHSAux), we multiply iv by 8, and we
+ // shift right by that amount, and take the lo-byte (in the little-endian
+ // case), or shift left by that amount, and take the hi-idx (in the
+ // big-endian case).
+ Value *IVBits = Builder.CreateZExtOrTrunc(
+ Builder.CreateShl(IV, 3, "iv.bits"), DataTy, "iv.indexer");
+ Value *DataIndexer =
+ Info.ByteOrderSwapped
+ ? Builder.CreateShl(Data, IVBits, "data.indexer")
+ : Builder.CreateLShr(Data, IVBits, "data.indexer");
+ Indexer = Builder.CreateXor(
+ DataIndexer,
+ Builder.CreateZExtOrTrunc(Indexer, DataTy, "crc.indexer.cast"),
+ "crc.data.indexer");
+ }
+
+ Indexer = Info.ByteOrderSwapped ? HiIdx(Builder, Indexer, "indexer.hi")
+ : LoByte(Builder, Indexer, "indexer.lo");
+
+ // Always index into a GEP using the index type.
+ Indexer = Builder.CreateZExt(
+ Indexer, SE->getDataLayout().getIndexType(GV->getType()),
+ "indexer.ext");
+
+ // CRCTableLd = CRCTable[(iv'th byte of data) ^ (top|bottom) byte of CRC].
+ Value *CRCTableGEP =
+ Builder.CreateInBoundsGEP(CRCTy, GV, Indexer, "tbl.ptradd");
+ Value *CRCTableLd = Builder.CreateLoad(CRCTy, CRCTableGEP, "tbl.ld");
+
+ // CRCNext = (CRC (<<|>>) 8) ^ CRCTableLd, or simply CRCTableLd in case of
+ // CRC-8.
+ Value *CRCNext = CRCTableLd;
+ if (CRCBW > 8) {
+ Value *CRCShift = Info.ByteOrderSwapped
+ ? Builder.CreateShl(CRC, 8, "crc.be.shift")
+ : Builder.CreateLShr(CRC, 8, "crc.le.shift");
+ CRCNext = Builder.CreateXor(CRCShift, CRCTableLd, "crc.next");
+ }
+
+ // Connect the back-edge for the loop, and RAUW the ComputedValue.
+ CRCPhi->addIncoming(CRCNext, CurLoop->getLoopLatch());
+ Info.ComputedValue->replaceUsesOutsideBlock(CRCNext,
+ CurLoop->getLoopLatch());
+ }
+
+ // Cleanup.
+ {
+ for (PHINode *PN : Cleanup)
+ RecursivelyDeleteDeadPHINode(PN);
+ SE->forgetLoop(CurLoop);
+ }
+ return true;
+}
+
bool LoopIdiomRecognize::runOnNoncountableLoop() {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName()