diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils')
| -rw-r--r-- | llvm/lib/Transforms/Utils/CodeLayout.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/Debugify.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/FunctionImportUtils.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/IRNormalizer.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/Local.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/LoopPeel.cpp | 20 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnroll.cpp | 138 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/LoopVersioning.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/ProfileVerify.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp | 105 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp | 39 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 359 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 113 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/SymbolRewriter.cpp | 72 |
16 files changed, 576 insertions, 308 deletions
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index c76b3afef50c..27b13eeaf4d7 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -1285,7 +1285,7 @@ private: // Cache misses on the merged chain double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount; double MergedSize = ChainPred->Size + ChainSucc->Size; - double MergedDensity = static_cast<double>(MergedCounts) / MergedSize; + double MergedDensity = MergedCounts / MergedSize; double NewScore = MergedCounts * missProbability(MergedDensity); return CurScore - NewScore; diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index 7063cde5263b..5a09b7385f2b 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -254,7 +254,6 @@ bool llvm::applyDebugifyMetadata( } if (ApplyToMF) ApplyToMF(DIB, F); - DIB.finalizeSubprogram(SP); } DIB.finalize(); diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 3bbe875bbe9e..1a9e16be6989 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -13,6 +13,8 @@ #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/TimeProfiler.h" + using namespace llvm; /// Uses the "source_filename" instead of a Module hash ID for the suffix of @@ -370,6 +372,7 @@ void FunctionImportGlobalProcessing::run() { processGlobalsForThinLTO(); } void llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index, bool ClearDSOLocalOnDeclarations, SetVector<GlobalValue *> *GlobalsToImport) { + llvm::TimeTraceScope timeScope("Rename module for ThinLTO"); FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport, ClearDSOLocalOnDeclarations); ThinLTOProcessing.run(); diff --git a/llvm/lib/Transforms/Utils/IRNormalizer.cpp b/llvm/lib/Transforms/Utils/IRNormalizer.cpp index ad91318ae474..fefa49f68c8d 100644 --- a/llvm/lib/Transforms/Utils/IRNormalizer.cpp +++ b/llvm/lib/Transforms/Utils/IRNormalizer.cpp @@ -427,7 +427,7 @@ void IRNormalizer::reorderInstructions(Function &F) const { // Process the remaining instructions. // // TODO: Do more a intelligent sorting of these instructions. For example, - // seperate between dead instructinos and instructions used in another + // separate between dead instructinos and instructions used in another // block. Use properties of the CFG the order instructions that are used // in another block. if (Visited.contains(&I)) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index ac344904f90f..2cfd70a1746c 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3397,8 +3397,8 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C, if (FP && Ty.isFloatingPointTy() && Ty.getScalarSizeInBits() <= 64) { const APFloat &APF = FP->getValueAPF(); APInt const &API = APF.bitcastToAPInt(); - if (auto Temp = API.getZExtValue()) - return DIB.createConstantValueExpression(static_cast<uint64_t>(Temp)); + if (uint64_t Temp = API.getZExtValue()) + return DIB.createConstantValueExpression(Temp); return DIB.createConstantValueExpression(*API.getRawData()); } @@ -3838,8 +3838,8 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin( bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { const auto *Op = I->getOperand(OpIdx); - // We can't have a PHI with a metadata type. - if (Op->getType()->isMetadataTy()) + // We can't have a PHI with a metadata or token type. + if (Op->getType()->isMetadataTy() || Op->getType()->isTokenLikeTy()) return false; // swifterror pointers can only be used by a load, store, or as a swifterror diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index ba0ac01cadd8..735bad1cb134 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -225,9 +225,9 @@ protected: // Auxiliary function to calculate the number of iterations for a comparison // instruction or a binary operator. - PeelCounter mergeTwoCounter(const Instruction &CmpOrBinaryOp, - const PeelCounterValue &LHS, - const PeelCounterValue &RHS) const; + PeelCounter mergeTwoCounters(const Instruction &CmpOrBinaryOp, + const PeelCounterValue &LHS, + const PeelCounterValue &RHS) const; // Returns true if the \p Phi is an induction in the target loop. This is a // lightweight check and possible to detect an IV in some cases. @@ -269,15 +269,13 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const { break; // Avoid infinite loop. - if (Visited.contains(Cur)) + if (!Visited.insert(Cur).second) return false; auto *I = dyn_cast<Instruction>(Cur); if (!I || !L.contains(I)) return false; - Visited.insert(Cur); - if (auto *Cast = dyn_cast<CastInst>(I)) { Cur = Cast->getOperand(0); } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) { @@ -300,14 +298,14 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const { /// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is /// considered an IV only if it is an addition or a subtraction. Otherwise the -/// result can be a value that is neither an loop-invariant nor an IV. +/// result can be a value that is neither a loop-invariant nor an IV. /// /// If both \p LHS and \p RHS are loop-invariants, then the result of /// \CmpOrBinaryOp is also a loop-invariant. PhiAnalyzer::PeelCounter -PhiAnalyzer::mergeTwoCounter(const Instruction &CmpOrBinaryOp, - const PeelCounterValue &LHS, - const PeelCounterValue &RHS) const { +PhiAnalyzer::mergeTwoCounters(const Instruction &CmpOrBinaryOp, + const PeelCounterValue &LHS, + const PeelCounterValue &RHS) const { auto &[LVal, LTy] = LHS; auto &[RVal, RTy] = RHS; unsigned NewVal = std::max(LVal, RVal); @@ -380,7 +378,7 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { if (RHS == Unknown) return Unknown; return (IterationsToInvarianceOrInduction[I] = - mergeTwoCounter(*I, *LHS, *RHS)); + mergeTwoCounters(*I, *LHS, *RHS)); } if (I->isCast()) // Cast instructions get the value of the operand. diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 86b268de43cf..b18aceaa67d7 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -108,6 +109,9 @@ UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, #endif ); +static cl::opt<bool> UnrollAddParallelReductions( + "unroll-add-parallel-reductions", cl::init(false), cl::Hidden, + cl::desc("Allow unrolling to add parallel reduction phis.")); /// Check if unrolling created a situation where we need to insert phi nodes to /// preserve LCSSA form. @@ -660,6 +664,41 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, OrigPHINode.push_back(cast<PHINode>(I)); } + // Collect phi nodes for reductions for which we can introduce multiple + // parallel reduction phis and compute the final reduction result after the + // loop. This requires a single exit block after unrolling. This is ensured by + // restricting to single-block loops where the unrolled iterations are known + // to not exit. + DenseMap<PHINode *, RecurrenceDescriptor> Reductions; + bool CanAddAdditionalAccumulators = + (UnrollAddParallelReductions.getNumOccurrences() > 0 + ? UnrollAddParallelReductions + : ULO.AddAdditionalAccumulators) && + !CompletelyUnroll && L->getNumBlocks() == 1 && + (ULO.Runtime || + (ExitInfos.contains(Header) && ((ExitInfos[Header].TripCount != 0 && + ExitInfos[Header].BreakoutTrip == 0)))); + + // Limit parallelizing reductions to unroll counts of 4 or less for now. + // TODO: The number of parallel reductions should depend on the number of + // execution units. We also don't have to add a parallel reduction phi per + // unrolled iteration, but could for example add a parallel phi for every 2 + // unrolled iterations. + if (CanAddAdditionalAccumulators && ULO.Count <= 4) { + for (PHINode &Phi : Header->phis()) { + auto RdxDesc = canParallelizeReductionWhenUnrolling(Phi, L, SE); + if (!RdxDesc) + continue; + + // Only handle duplicate phis for a single reduction for now. + // TODO: Handle any number of reductions + if (!Reductions.empty()) + continue; + + Reductions[&Phi] = *RdxDesc; + } + } + std::vector<BasicBlock *> Headers; std::vector<BasicBlock *> Latches; Headers.push_back(Header); @@ -710,6 +749,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // latch. This is a reasonable default placement if we don't have block // frequencies, and if we do, well the layout will be adjusted later. auto BlockInsertPt = std::next(LatchBlock->getIterator()); + SmallVector<Instruction *> PartialReductions; for (unsigned It = 1; It != ULO.Count; ++It) { SmallVector<BasicBlock *, 8> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; @@ -733,6 +773,31 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, for (PHINode *OrigPHI : OrigPHINode) { PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); + + // Use cloned phis as parallel phis for partial reductions, which will + // get combined to the final reduction result after the loop. + if (Reductions.contains(OrigPHI)) { + // Collect partial reduction results. + if (PartialReductions.empty()) + PartialReductions.push_back(cast<Instruction>(InVal)); + PartialReductions.push_back(cast<Instruction>(VMap[InVal])); + + // Update the start value for the cloned phis to use the identity + // value for the reduction. + const RecurrenceDescriptor &RdxDesc = Reductions[OrigPHI]; + NewPHI->setIncomingValueForBlock( + L->getLoopPreheader(), + getRecurrenceIdentity(RdxDesc.getRecurrenceKind(), + OrigPHI->getType(), + RdxDesc.getFastMathFlags())); + + // Update NewPHI to use the cloned value for the iteration and move + // to header. + NewPHI->replaceUsesOfWith(InVal, VMap[InVal]); + NewPHI->moveBefore(OrigPHI->getIterator()); + continue; + } + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; @@ -832,6 +897,9 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); PN->eraseFromParent(); } else if (ULO.Count > 1) { + if (Reductions.contains(PN)) + continue; + Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. @@ -1010,6 +1078,38 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } + // If there are partial reductions, create code in the exit block to compute + // the final result and update users of the final result. + if (!PartialReductions.empty()) { + BasicBlock *ExitBlock = L->getExitBlock(); + assert(ExitBlock && + "Can only introduce parallel reduction phis with single exit block"); + assert(Reductions.size() == 1 && + "currently only a single reduction is supported"); + Value *FinalRdxValue = PartialReductions.back(); + Value *RdxResult = nullptr; + for (PHINode &Phi : ExitBlock->phis()) { + if (Phi.getIncomingValueForBlock(L->getLoopLatch()) != FinalRdxValue) + continue; + if (!RdxResult) { + RdxResult = PartialReductions.front(); + IRBuilder Builder(ExitBlock, ExitBlock->getFirstNonPHIIt()); + RecurKind RK = Reductions.begin()->second.getRecurrenceKind(); + for (Instruction *RdxPart : drop_begin(PartialReductions)) { + RdxResult = Builder.CreateBinOp( + (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK), + RdxPart, RdxResult, "bin.rdx"); + } + NeedToFixLCSSA = true; + for (Instruction *RdxPart : PartialReductions) + RdxPart->dropPoisonGeneratingFlags(); + } + + Phi.replaceAllUsesWith(RdxResult); + continue; + } + } + if (DTUToUse) { // Apply updates to the DomTree. DT = &DTU.getDomTree(); @@ -1111,3 +1211,41 @@ MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { } return nullptr; } + +std::optional<RecurrenceDescriptor> +llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L, + ScalarEvolution *SE) { + RecurrenceDescriptor RdxDesc; + if (!RecurrenceDescriptor::isReductionPHI(&Phi, L, RdxDesc, + /*DemandedBits=*/nullptr, + /*AC=*/nullptr, /*DT=*/nullptr, SE)) + return std::nullopt; + RecurKind RK = RdxDesc.getRecurrenceKind(); + // Skip unsupported reductions. + // TODO: Handle additional reductions, including FP and min-max + // reductions. + if (!RecurrenceDescriptor::isIntegerRecurrenceKind(RK) || + RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || + RecurrenceDescriptor::isFindIVRecurrenceKind(RK) || + RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) + return std::nullopt; + + if (RdxDesc.IntermediateStore) + return std::nullopt; + + // Don't unroll reductions with constant ops; those can be folded to a + // single induction update. + if (any_of(cast<Instruction>(Phi.getIncomingValueForBlock(L->getLoopLatch())) + ->operands(), + IsaPred<Constant>)) + return std::nullopt; + + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch || + !is_contained( + cast<Instruction>(Phi.getIncomingValueForBlock(Latch))->operands(), + &Phi)) + return std::nullopt; + + return RdxDesc; +} diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 843364eb34f8..b172ef6ba080 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -2032,6 +2032,7 @@ Value *llvm::addRuntimeChecks( MemoryRuntimeCheck = IsConflict; } + Exp.eraseDeadInstructions(MemoryRuntimeCheck); return MemoryRuntimeCheck; } @@ -2077,6 +2078,7 @@ Value *llvm::addDiffRuntimeChecks( MemoryRuntimeCheck = IsConflict; } + Expander.eraseDeadInstructions(MemoryRuntimeCheck); return MemoryRuntimeCheck; } diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 1711163fb9f5..ec2e6c1ab796 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -81,6 +81,8 @@ void LoopVersioning::versionLoop( } else RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck; + Exp.eraseDeadInstructions(SCEVRuntimeCheck); + assert(RuntimeCheck && "called even though we don't need " "any runtime checks"); diff --git a/llvm/lib/Transforms/Utils/ProfileVerify.cpp b/llvm/lib/Transforms/Utils/ProfileVerify.cpp index 41647f7717a4..faacd422c009 100644 --- a/llvm/lib/Transforms/Utils/ProfileVerify.cpp +++ b/llvm/lib/Transforms/Utils/ProfileVerify.cpp @@ -155,12 +155,15 @@ PreservedAnalyses ProfileVerifierPass::run(Function &F, FunctionAnalysisManager &FAM) { const auto EntryCount = F.getEntryCount(/*AllowSynthetic=*/true); if (!EntryCount) { - F.getContext().emitError("Profile verification failed: function entry " - "count missing (set to 0 if cold)"); + auto *MD = F.getMetadata(LLVMContext::MD_prof); + if (!MD || !isExplicitlyUnknownProfileMetadata(*MD)) { + F.getContext().emitError("Profile verification failed: function entry " + "count missing (set to 0 if cold)"); + return PreservedAnalyses::all(); + } + } else if (EntryCount->getCount() == 0) { return PreservedAnalyses::all(); } - if (EntryCount->getCount() == 0) - return PreservedAnalyses::all(); for (const auto &BB : F) { if (AnnotateSelect) { for (const auto &I : BB) diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 10c162bc6463..d93a4d87f30f 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -849,9 +849,12 @@ void PromoteMem2Reg::run() { for (unsigned i = 0, e = Allocas.size(); i != e; ++i) IncomingVals.init(i, UndefValue::get(Allocas[i]->getAllocatedType())); - // When handling debug info, treat all incoming values as if they have unknown - // locations until proven otherwise. + // When handling debug info, treat all incoming values as if they have + // compiler-generated (empty) locations, representing the uninitialized + // alloca, until proven otherwise. IncomingLocs.resize(Allocas.size()); + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) + IncomingLocs.init(i, DebugLoc::getCompilerGenerated()); // The renamer uses the Visited set to avoid infinite loops. Visited.resize(F.getMaxBlockNumber(), false); diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp index d53a3144bf57..a814867652cd 100644 --- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp +++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -21,29 +21,20 @@ using namespace llvm; -static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { +struct LookupTableInfo { + Value *Index; + SmallVector<Constant *> Ptrs; +}; + +static bool shouldConvertToRelLookupTable(LookupTableInfo &Info, Module &M, + GlobalVariable &GV) { // If lookup table has more than one user, // do not generate a relative lookup table. // This is to simplify the analysis that needs to be done for this pass. // TODO: Add support for lookup tables with multiple uses. // For ex, this can happen when a function that uses a lookup table gets // inlined into multiple call sites. - if (!GV.hasInitializer() || - !GV.isConstant() || - !GV.hasOneUse()) - return false; - - GetElementPtrInst *GEP = - dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser()); - if (!GEP || !GEP->hasOneUse() || - GV.getValueType() != GEP->getSourceElementType()) - return false; - - LoadInst *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser()); - if (!Load || !Load->hasOneUse() || - Load->getType() != GEP->getResultElementType()) - return false; - + // // If the original lookup table does not have local linkage and is // not dso_local, do not generate a relative lookup table. // This optimization creates a relative lookup table that consists of @@ -51,21 +42,40 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { // To be able to generate these offsets, relative lookup table and // its elements should have internal linkage and be dso_local, which means // that they should resolve to symbols within the same linkage unit. - if (!GV.hasLocalLinkage() || - !GV.isDSOLocal() || - !GV.isImplicitDSOLocal()) + if (!GV.hasInitializer() || !GV.isConstant() || !GV.hasOneUse() || + !GV.hasLocalLinkage() || !GV.isDSOLocal() || !GV.isImplicitDSOLocal()) return false; - ConstantArray *Array = dyn_cast<ConstantArray>(GV.getInitializer()); - if (!Array) + auto *GEP = dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser()); + if (!GEP || !GEP->hasOneUse()) + return false; + + auto *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser()); + if (!Load || !Load->hasOneUse()) return false; // If values are not 64-bit pointers, do not generate a relative lookup table. const DataLayout &DL = M.getDataLayout(); - Type *ElemType = Array->getType()->getElementType(); + Type *ElemType = Load->getType(); if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64) return false; + // Make sure this is a gep of the form GV + scale*var. + unsigned IndexWidth = + DL.getIndexTypeSizeInBits(Load->getPointerOperand()->getType()); + SmallMapVector<Value *, APInt, 4> VarOffsets; + APInt ConstOffset(IndexWidth, 0); + if (!GEP->collectOffset(DL, IndexWidth, VarOffsets, ConstOffset) || + !ConstOffset.isZero() || VarOffsets.size() != 1) + return false; + + // This can't be a pointer lookup table if the stride is smaller than a + // pointer. + Info.Index = VarOffsets.front().first; + const APInt &Stride = VarOffsets.front().second; + if (Stride.ult(DL.getTypeStoreSize(ElemType))) + return false; + SmallVector<GlobalVariable *, 4> GVOps; Triple TT = M.getTargetTriple(); // FIXME: This should be removed in the future. @@ -80,14 +90,20 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { // https://github.com/rust-lang/rust/issues/141306. || (TT.isX86() && TT.isOSDarwin()); - for (const Use &Op : Array->operands()) { - Constant *ConstOp = cast<Constant>(&Op); + APInt Offset(IndexWidth, 0); + uint64_t GVSize = DL.getTypeAllocSize(GV.getValueType()); + for (; Offset.ult(GVSize); Offset += Stride) { + Constant *C = + ConstantFoldLoadFromConst(GV.getInitializer(), ElemType, Offset, DL); + if (!C) + return false; + GlobalValue *GVOp; - APInt Offset; + APInt GVOffset; // If an operand is not a constant offset from a lookup table, // do not generate a relative lookup table. - if (!IsConstantOffsetFromGlobal(ConstOp, GVOp, Offset, DL)) + if (!IsConstantOffsetFromGlobal(C, GVOp, GVOffset, DL)) return false; // If operand is mutable, do not generate a relative lookup table. @@ -102,6 +118,8 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { if (ShouldDropUnnamedAddr) GVOps.push_back(GlovalVarOp); + + Info.Ptrs.push_back(C); } if (ShouldDropUnnamedAddr) @@ -111,14 +129,12 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { return true; } -static GlobalVariable *createRelLookupTable(Function &Func, +static GlobalVariable *createRelLookupTable(LookupTableInfo &Info, + Function &Func, GlobalVariable &LookupTable) { Module &M = *Func.getParent(); - ConstantArray *LookupTableArr = - cast<ConstantArray>(LookupTable.getInitializer()); - unsigned NumElts = LookupTableArr->getType()->getNumElements(); ArrayType *IntArrayTy = - ArrayType::get(Type::getInt32Ty(M.getContext()), NumElts); + ArrayType::get(Type::getInt32Ty(M.getContext()), Info.Ptrs.size()); GlobalVariable *RelLookupTable = new GlobalVariable( M, IntArrayTy, LookupTable.isConstant(), LookupTable.getLinkage(), @@ -127,10 +143,9 @@ static GlobalVariable *createRelLookupTable(Function &Func, LookupTable.isExternallyInitialized()); uint64_t Idx = 0; - SmallVector<Constant *, 64> RelLookupTableContents(NumElts); + SmallVector<Constant *, 64> RelLookupTableContents(Info.Ptrs.size()); - for (Use &Operand : LookupTableArr->operands()) { - Constant *Element = cast<Constant>(Operand); + for (Constant *Element : Info.Ptrs) { Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext()); Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy); Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy); @@ -148,7 +163,8 @@ static GlobalVariable *createRelLookupTable(Function &Func, return RelLookupTable; } -static void convertToRelLookupTable(GlobalVariable &LookupTable) { +static void convertToRelLookupTable(LookupTableInfo &Info, + GlobalVariable &LookupTable) { GetElementPtrInst *GEP = cast<GetElementPtrInst>(LookupTable.use_begin()->getUser()); LoadInst *Load = cast<LoadInst>(GEP->use_begin()->getUser()); @@ -159,21 +175,21 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) { Function &Func = *BB->getParent(); // Generate an array that consists of relative offsets. - GlobalVariable *RelLookupTable = createRelLookupTable(Func, LookupTable); + GlobalVariable *RelLookupTable = + createRelLookupTable(Info, Func, LookupTable); // Place new instruction sequence before GEP. Builder.SetInsertPoint(GEP); - Value *Index = GEP->getOperand(2); - IntegerType *IntTy = cast<IntegerType>(Index->getType()); - Value *Offset = - Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift"); + IntegerType *IntTy = cast<IntegerType>(Info.Index->getType()); + Value *Offset = Builder.CreateShl(Info.Index, ConstantInt::get(IntTy, 2), + "reltable.shift"); // Insert the call to load.relative intrinsic before LOAD. // GEP might not be immediately followed by a LOAD, like it can be hoisted // outside the loop or another instruction might be inserted them in between. Builder.SetInsertPoint(Load); Function *LoadRelIntrinsic = llvm::Intrinsic::getOrInsertDeclaration( - &M, Intrinsic::load_relative, {Index->getType()}); + &M, Intrinsic::load_relative, {Info.Index->getType()}); // Create a call to load.relative intrinsic that computes the target address // by adding base address (lookup table address) and relative offset. @@ -205,10 +221,11 @@ static bool convertToRelativeLookupTables( bool Changed = false; for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { - if (!shouldConvertToRelLookupTable(M, GV)) + LookupTableInfo Info; + if (!shouldConvertToRelLookupTable(Info, M, GV)) continue; - convertToRelLookupTable(GV); + convertToRelLookupTable(Info, GV); // Remove the original lookup table. GV.eraseFromParent(); diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 060ca92e559a..28befd0aa1ce 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #if LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -175,6 +176,26 @@ SCEVExpander::findInsertPointAfter(Instruction *I, return IP; } +void SCEVExpander::eraseDeadInstructions(Value *Root) { + SmallVector<Value *> WorkList; + SmallPtrSet<Value *, 8> DeletedValues; + append_range(WorkList, getAllInsertedInstructions()); + while (!WorkList.empty()) { + Value *V = WorkList.pop_back_val(); + if (DeletedValues.contains(V)) + continue; + auto *I = dyn_cast<Instruction>(V); + if (!I || I == Root || !isInsertedInstruction(I) || + !isInstructionTriviallyDead(I)) + continue; + append_range(WorkList, I->operands()); + InsertedValues.erase(I); + InsertedPostIncValues.erase(I); + DeletedValues.insert(I); + I->eraseFromParent(); + } +} + BasicBlock::iterator SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const { // Cast the argument at the beginning of the entry block, after @@ -1239,10 +1260,13 @@ Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) { if (!isa<SCEVAddRecExpr>(ExitSCEV)) continue; Type *PhiTy = PN.getType(); - if (STy->isIntegerTy() && PhiTy->isPointerTy()) + if (STy->isIntegerTy() && PhiTy->isPointerTy()) { ExitSCEV = SE.getPtrToIntExpr(ExitSCEV, STy); - else if (S->getType() != PN.getType()) + if (isa<SCEVCouldNotCompute>(ExitSCEV)) + continue; + } else if (S->getType() != PN.getType()) { continue; + } // Check if we can re-use the existing PN, by adjusting it with an expanded // offset, if the offset is simpler. @@ -2184,8 +2208,15 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, // negative. If Step is known to be positive or negative, only create // either 1. or 2. auto ComputeEndCheck = [&]() -> Value * { - // Checking <u 0 is always false. - if (!Signed && Start->isZero() && SE.isKnownPositive(Step)) + // Checking <u 0 is always false, if (Step * trunc ExitCount) does not wrap. + // TODO: Predicates that can be proven true/false should be discarded when + // the predicates are created, not late during expansion. + if (!Signed && Start->isZero() && SE.isKnownPositive(Step) && + DstBits < SrcBits && + ExitCount == SE.getZeroExtendExpr(SE.getTruncateExpr(ExitCount, ARTy), + ExitCount->getType()) && + SE.willNotOverflow(Instruction::Mul, Signed, Step, + SE.getTruncateExpr(ExitCount, ARTy))) return ConstantInt::getFalse(Loc->getContext()); // Get the backedge taken count and truncate or extended to the AR type. diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7a538ae2c583..970f85378d3d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -612,6 +612,18 @@ private: /// If CompValue is already set, the function is expected to fail if a match /// is found but the value compared to is different. bool matchInstruction(Instruction *I, bool isEQ) { + if (match(I, m_Not(m_Instruction(I)))) + isEQ = !isEQ; + + Value *Val; + if (match(I, m_NUWTrunc(m_Value(Val)))) { + // If we already have a value for the switch, it has to match! + if (!setValueOnce(Val)) + return false; + UsedICmps++; + Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ)); + return true; + } // If this is an icmp against a constant, handle this as one of the cases. ICmpInst *ICI; ConstantInt *C; @@ -2260,10 +2272,6 @@ static bool canSinkInstructions( for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { Value *Op = I0->getOperand(OI); - if (Op->getType()->isTokenTy()) - // Don't touch any operand of token type. - return false; - auto SameAsI0 = [&I0, OI](const Instruction *I) { assert(I->getNumOperands() == I0->getNumOperands()); return I->getOperand(OI) == I0->getOperand(OI); @@ -2764,8 +2772,7 @@ bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) { Use &U1 = std::get<1>(Ops); if (U0 == U1) return false; - return U0->getType()->isTokenTy() || - !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()), + return !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()), U0.getOperandNo()); }; assert(Invokes.size() == 2 && "Always called with exactly two candidates."); @@ -4404,10 +4411,12 @@ static bool mergeConditionalStoreToAddress( // OK, we're going to sink the stores to PostBB. The store has to be // conditional though, so first create the predicate. - Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator()) - ->getCondition(); - Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator()) - ->getCondition(); + BranchInst *PBranch = + cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator()); + BranchInst *QBranch = + cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator()); + Value *PCond = PBranch->getCondition(); + Value *QCond = QBranch->getCondition(); Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), PStore->getParent()); @@ -4418,13 +4427,11 @@ static bool mergeConditionalStoreToAddress( IRBuilder<> QB(PostBB, PostBBFirst); QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc()); - Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); - Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); + InvertPCond ^= (PStore->getParent() != PTB); + InvertQCond ^= (QStore->getParent() != QTB); + Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond; + Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond; - if (InvertPCond) - PPred = QB.CreateNot(PPred); - if (InvertQCond) - QPred = QB.CreateNot(QPred); Value *CombinedPred = QB.CreateOr(PPred, QPred); BasicBlock::iterator InsertPt = QB.GetInsertPoint(); @@ -4808,23 +4815,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, SelectInst *NV = cast<SelectInst>( Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux")); PN.setIncomingValue(PBBIdx, NV); - // Although the select has the same condition as PBI, the original branch - // weights for PBI do not apply to the new select because the select's - // 'logical' edges are incoming edges of the phi that is eliminated, not - // the outgoing edges of PBI. + // The select has the same condition as PBI, in the same BB. The + // probabilities don't change. if (HasWeights) { - uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; - uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; - uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; - uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; - // The weight to PredCommonDest should be PredCommon * SuccTotal. - // The weight to PredOtherDest should be PredOther * SuccCommon. - uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther), - PredOther * SuccCommon}; - - fitWeights(NewWeights); - - setBranchWeights(NV, NewWeights[0], NewWeights[1], + uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight; + setBranchWeights(NV, TrueWeight, FalseWeight, /*IsExpected=*/false); } } @@ -6437,34 +6433,42 @@ static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, namespace { -/// This class represents a lookup table that can be used to replace a switch. -class SwitchLookupTable { +/// This class finds alternatives for switches to ultimately +/// replace the switch. +class SwitchReplacement { public: - /// Create a lookup table to use as a switch replacement with the contents - /// of Values, using DefaultValue to fill any holes in the table. - SwitchLookupTable( + /// Create a helper for optimizations to use as a switch replacement. + /// Find a better representation for the content of Values, + /// using DefaultValue to fill any holes in the table. + SwitchReplacement( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName); - /// Build instructions with Builder to retrieve the value at - /// the position given by Index in the lookup table. - Value *buildLookup(Value *Index, IRBuilder<> &Builder, const DataLayout &DL); + /// Build instructions with Builder to retrieve values using Index + /// and replace the switch. + Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL, + Function *Func); /// Return true if a table with TableSize elements of /// type ElementType would fit in a target-legal register. static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize, Type *ElementType); + /// Return the default value of the switch. + Constant *getDefaultValue(); + + /// Return true if the replacement is a lookup table. + bool isLookupTable(); + private: - // Depending on the contents of the table, it can be represented in - // different ways. + // Depending on the switch, there are different alternatives. enum { - // For tables where each element contains the same value, we just have to + // For switches where each case contains the same value, we just have to // store that single value and return it for each lookup. SingleValueKind, - // For tables where there is a linear relationship between table index + // For switches where there is a linear relationship between table index // and values. We calculate the result with a simple multiplication // and addition instead of a table lookup. LinearMapKind, @@ -6476,9 +6480,15 @@ private: // The table is stored as an array of values. Values are retrieved by load // instructions from the table. - ArrayKind + LookupTableKind } Kind; + // The default value of the switch. + Constant *DefaultValue; + + // The type of the output values. + Type *ValueType; + // For SingleValueKind, this is the single value. Constant *SingleValue = nullptr; @@ -6491,23 +6501,24 @@ private: ConstantInt *LinearMultiplier = nullptr; bool LinearMapValWrapped = false; - // For ArrayKind, this is the array. - GlobalVariable *Array = nullptr; + // For LookupTableKind, this is the table. + Constant *Initializer = nullptr; }; } // end anonymous namespace -SwitchLookupTable::SwitchLookupTable( +SwitchReplacement::SwitchReplacement( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, - Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) + : DefaultValue(DefaultValue) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); // If all values in the table are equal, this is that value. SingleValue = Values.begin()->second; - Type *ValueType = Values.begin()->second->getType(); + ValueType = Values.begin()->second->getType(); // Build up the table contents. SmallVector<Constant *, 64> TableContents(TableSize); @@ -6597,7 +6608,6 @@ SwitchLookupTable::SwitchLookupTable( (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap); LinearMapValWrapped = NonMonotonic || MayWrap; Kind = LinearMapKind; - ++NumLinearMaps; return; } } @@ -6617,30 +6627,23 @@ SwitchLookupTable::SwitchLookupTable( BitMap = ConstantInt::get(M.getContext(), TableInt); BitMapElementTy = IT; Kind = BitMapKind; - ++NumBitMaps; return; } // Store the table in an array. - ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); - Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - - Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true, - GlobalVariable::PrivateLinkage, Initializer, - "switch.table." + FuncName); - Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - // Set the alignment to that of an array items. We will be only loading one - // value out of it. - Array->setAlignment(DL.getPrefTypeAlign(ValueType)); - Kind = ArrayKind; + auto *TableTy = ArrayType::get(ValueType, TableSize); + Initializer = ConstantArray::get(TableTy, TableContents); + + Kind = LookupTableKind; } -Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder, - const DataLayout &DL) { +Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder, + const DataLayout &DL, Function *Func) { switch (Kind) { case SingleValueKind: return SingleValue; case LinearMapKind: { + ++NumLinearMaps; // Derive the result value from the input value. Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(), false, "switch.idx.cast"); @@ -6656,6 +6659,7 @@ Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder, return Result; } case BitMapKind: { + ++NumBitMaps; // Type of the bitmap (e.g. i59). IntegerType *MapTy = BitMap->getIntegerType(); @@ -6677,9 +6681,18 @@ Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder, // Mask off. return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked"); } - case ArrayKind: { - Type *IndexTy = DL.getIndexType(Array->getType()); - auto *ArrayTy = cast<ArrayType>(Array->getValueType()); + case LookupTableKind: { + ++NumLookupTables; + auto *Table = + new GlobalVariable(*Func->getParent(), Initializer->getType(), + /*isConstant=*/true, GlobalVariable::PrivateLinkage, + Initializer, "switch.table." + Func->getName()); + Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + // Set the alignment to that of an array items. We will be only loading one + // value out of it. + Table->setAlignment(DL.getPrefTypeAlign(ValueType)); + Type *IndexTy = DL.getIndexType(Table->getType()); + auto *ArrayTy = cast<ArrayType>(Table->getValueType()); if (Index->getType() != IndexTy) { unsigned OldBitWidth = Index->getType()->getIntegerBitWidth(); @@ -6691,14 +6704,14 @@ Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder, Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index}; Value *GEP = - Builder.CreateInBoundsGEP(ArrayTy, Array, GEPIndices, "switch.gep"); + Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep"); return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load"); } } - llvm_unreachable("Unknown lookup table kind!"); + llvm_unreachable("Unknown helper kind!"); } -bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL, +bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL, uint64_t TableSize, Type *ElementType) { auto *IT = dyn_cast<IntegerType>(ElementType); @@ -6734,6 +6747,10 @@ static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, DL.fitsInLegalInteger(IT->getBitWidth()); } +Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; } + +bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; } + static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) { // 40% is the default density for building a jump table in optsize/minsize // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this @@ -6760,25 +6777,23 @@ static bool isSwitchDense(ArrayRef<int64_t> Values) { // TODO: We could support larger than legal types by limiting based on the // number of loads required and/or table size. If the constants are small we // could use smaller table entries and extend after the load. -static bool -shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, - const TargetTransformInfo &TTI, const DataLayout &DL, - const SmallDenseMap<PHINode *, Type *> &ResultTypes) { +static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, + const TargetTransformInfo &TTI, + const DataLayout &DL, + const SmallVector<Type *> &ResultTypes) { if (SI->getNumCases() > TableSize) return false; // TableSize overflowed. bool AllTablesFitInRegister = true; bool HasIllegalType = false; - for (const auto &I : ResultTypes) { - Type *Ty = I.second; - + for (const auto &Ty : ResultTypes) { // Saturate this flag to true. HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL); // Saturate this flag to false. AllTablesFitInRegister = AllTablesFitInRegister && - SwitchLookupTable::wouldFitInRegister(DL, TableSize, Ty); + SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty); // If both flags saturate, we're done. NOTE: This *only* works with // saturating flags, and all flags have to saturate first due to the @@ -6800,7 +6815,7 @@ shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, static bool shouldUseSwitchConditionAsTableIndex( ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, - bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes, + bool HasDefaultResults, const SmallVector<Type *> &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI) { if (MinCaseVal.isNullValue()) return true; @@ -6808,10 +6823,9 @@ static bool shouldUseSwitchConditionAsTableIndex( MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() || !HasDefaultResults) return false; - return all_of(ResultTypes, [&](const auto &KV) { - return SwitchLookupTable::wouldFitInRegister( - DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, - KV.second /* ResultType */); + return all_of(ResultTypes, [&](const auto &ResultType) { + return SwitchReplacement::wouldFitInRegister( + DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType); }); } @@ -6900,18 +6914,13 @@ static void reuseTableCompare( /// If the switch is only used to initialize one or more phi nodes in a common /// successor block with different constant values, replace the switch with /// lookup tables. -static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, - DomTreeUpdater *DTU, const DataLayout &DL, - const TargetTransformInfo &TTI) { +static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, + DomTreeUpdater *DTU, const DataLayout &DL, + const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); BasicBlock *BB = SI->getParent(); Function *Fn = BB->getParent(); - // Only build lookup table when we have a target that supports it or the - // attribute is not set. - if (!TTI.shouldBuildLookupTables() || - (Fn->getFnAttribute("no-jump-tables").getValueAsBool())) - return false; // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. @@ -6938,7 +6947,7 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, SmallDenseMap<PHINode *, ResultListTy> ResultLists; SmallDenseMap<PHINode *, Constant *> DefaultResults; - SmallDenseMap<PHINode *, Type *> ResultTypes; + SmallVector<Type *> ResultTypes; SmallVector<PHINode *, 4> PHIs; for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { @@ -6955,7 +6964,8 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Results, DL, TTI)) return false; - // Append the result from this case to the list for each phi. + // Append the result and result types from this case to the list for each + // phi. for (const auto &I : Results) { PHINode *PHI = I.first; Constant *Value = I.second; @@ -6963,23 +6973,16 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (Inserted) PHIs.push_back(PHI); It->second.push_back(std::make_pair(CaseVal, Value)); + ResultTypes.push_back(PHI->getType()); } } - // Keep track of the result types. - for (PHINode *PHI : PHIs) { - ResultTypes[PHI] = ResultLists[PHI][0].second->getType(); - } - - uint64_t NumResults = ResultLists[PHIs[0]].size(); - // If the table has holes, we need a constant result for the default case // or a bitmask that fits in a register. SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList; bool HasDefaultResults = getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResultsList, DL, TTI); - for (const auto &I : DefaultResultsList) { PHINode *PHI = I.first; Constant *Result = I.second; @@ -6989,15 +6992,21 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex( *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI); uint64_t TableSize; - if (UseSwitchConditionAsTableIndex) + ConstantInt *TableIndexOffset; + if (UseSwitchConditionAsTableIndex) { TableSize = MaxCaseVal->getLimitedValue() + 1; - else + TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0); + } else { TableSize = (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1; + TableIndexOffset = MinCaseVal; + } + // If the default destination is unreachable, or if the lookup table covers // all values of the conditional variable, branch directly to the lookup table // BB. Otherwise, check that the condition is within the case range. + uint64_t NumResults = ResultLists[PHIs[0]].size(); bool DefaultIsReachable = !SI->defaultDestUnreachable(); bool TableHasHoles = (NumResults < TableSize); @@ -7025,68 +7034,100 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) return false; - std::vector<DominatorTree::UpdateType> Updates; - - // Compute the maximum table size representable by the integer type we are - // switching upon. - unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); - uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; - assert(MaxTableSize >= TableSize && - "It is impossible for a switch to have more entries than the max " - "representable value of its input integer type's size."); - - // Create the BB that does the lookups. - Module &Mod = *CommonDest->getParent()->getParent(); - BasicBlock *LookupBB = BasicBlock::Create( - Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); - // Compute the table index value. - Builder.SetInsertPoint(SI); Value *TableIndex; - ConstantInt *TableIndexOffset; if (UseSwitchConditionAsTableIndex) { - TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0); TableIndex = SI->getCondition(); - } else { - TableIndexOffset = MinCaseVal; + if (HasDefaultResults) { + // Grow the table to cover all possible index values to avoid the range + // check. It will use the default result to fill in the table hole later, + // so make sure it exist. + ConstantRange CR = + computeConstantRange(TableIndex, /* ForSigned */ false); + // Grow the table shouldn't have any size impact by checking + // wouldFitInRegister. + // TODO: Consider growing the table also when it doesn't fit in a register + // if no optsize is specified. + const uint64_t UpperBound = CR.getUpper().getLimitedValue(); + if (!CR.isUpperWrapped() && + all_of(ResultTypes, [&](const auto &ResultType) { + return SwitchReplacement::wouldFitInRegister(DL, UpperBound, + ResultType); + })) { + // There may be some case index larger than the UpperBound (unreachable + // case), so make sure the table size does not get smaller. + TableSize = std::max(UpperBound, TableSize); + // The default branch is unreachable after we enlarge the lookup table. + // Adjust DefaultIsReachable to reuse code path. + DefaultIsReachable = false; + } + } + } + + // Keep track of the switch replacement for each phi + SmallDenseMap<PHINode *, SwitchReplacement> PhiToReplacementMap; + for (PHINode *PHI : PHIs) { + const auto &ResultList = ResultLists[PHI]; + + Type *ResultType = ResultList.begin()->second->getType(); + // Use any value to fill the lookup table holes. + Constant *DefaultVal = + AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI]; + StringRef FuncName = Fn->getName(); + SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset, + ResultList, DefaultVal, DL, FuncName); + PhiToReplacementMap.insert({PHI, Replacement}); + } + + bool AnyLookupTables = any_of( + PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); }); + + // A few conditions prevent the generation of lookup tables: + // 1. The target does not support lookup tables. + // 2. The "no-jump-tables" function attribute is set. + // However, these objections do not apply to other switch replacements, like + // the bitmap, so we only stop here if any of these conditions are met and we + // want to create a LUT. Otherwise, continue with the switch replacement. + if (AnyLookupTables && + (!TTI.shouldBuildLookupTables() || + Fn->getFnAttribute("no-jump-tables").getValueAsBool())) + return false; + + Builder.SetInsertPoint(SI); + // TableIndex is the switch condition - TableIndexOffset if we don't + // use the condition directly + if (!UseSwitchConditionAsTableIndex) { // If the default is unreachable, all case values are s>= MinCaseVal. Then // we can try to attach nsw. bool MayWrap = true; if (!DefaultIsReachable) { - APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap); + APInt Res = + MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap); (void)Res; } - TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset, "switch.tableidx", /*HasNUW =*/false, /*HasNSW =*/!MayWrap); } - BranchInst *RangeCheckBranch = nullptr; + std::vector<DominatorTree::UpdateType> Updates; - // Grow the table to cover all possible index values to avoid the range check. - // It will use the default result to fill in the table hole later, so make - // sure it exist. - if (UseSwitchConditionAsTableIndex && HasDefaultResults) { - ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false); - // Grow the table shouldn't have any size impact by checking - // wouldFitInRegister. - // TODO: Consider growing the table also when it doesn't fit in a register - // if no optsize is specified. - const uint64_t UpperBound = CR.getUpper().getLimitedValue(); - if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) { - return SwitchLookupTable::wouldFitInRegister( - DL, UpperBound, KV.second /* ResultType */); - })) { - // There may be some case index larger than the UpperBound (unreachable - // case), so make sure the table size does not get smaller. - TableSize = std::max(UpperBound, TableSize); - // The default branch is unreachable after we enlarge the lookup table. - // Adjust DefaultIsReachable to reuse code path. - DefaultIsReachable = false; - } - } + // Compute the maximum table size representable by the integer type we are + // switching upon. + unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); + uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; + assert(MaxTableSize >= TableSize && + "It is impossible for a switch to have more entries than the max " + "representable value of its input integer type's size."); + + // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); + BasicBlock *LookupBB = BasicBlock::Create( + Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); + + BranchInst *RangeCheckBranch = nullptr; + Builder.SetInsertPoint(SI); const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); @@ -7157,25 +7198,16 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, for (PHINode *PHI : PHIs) { const ResultListTy &ResultList = ResultLists[PHI]; - - Type *ResultType = ResultList.begin()->second->getType(); - - // Use any value to fill the lookup table holes. - Constant *DV = - AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI]; - StringRef FuncName = Fn->getName(); - SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV, - DL, FuncName); - - Value *Result = Table.buildLookup(TableIndex, Builder, DL); - + auto Replacement = PhiToReplacementMap.at(PHI); + auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn); // Do a small peephole optimization: re-use the switch table compare if // possible. if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) { BasicBlock *PhiBlock = PHI->getParent(); // Search for compare instructions which use the phi. for (auto *User : PHI->users()) { - reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList); + reuseTableCompare(User, PhiBlock, RangeCheckBranch, + Replacement.getDefaultValue(), ResultList); } } @@ -7202,7 +7234,6 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (DTU) DTU->applyUpdates(Updates); - ++NumLookupTables; if (NeedMask) ++NumLookupTablesHoles; return true; @@ -7708,7 +7739,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // CVP. Therefore, only apply this transformation during late stages of the // optimisation pipeline. if (Options.ConvertSwitchToLookupTable && - switchToLookupTable(SI, Builder, DTU, DL, TTI)) + simplifySwitchLookup(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI)) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 2d6a748f4507..8acebbaa5458 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -97,6 +97,10 @@ static cl::opt<unsigned, false, HotColdHintParser> static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue( "hot-new-hint-value", cl::Hidden, cl::init(254), cl::desc("Value to pass to hot/cold operator new for hot allocation")); +static cl::opt<unsigned, false, HotColdHintParser> AmbiguousNewHintValue( + "ambiguous-new-hint-value", cl::Hidden, cl::init(222), + cl::desc( + "Value to pass to hot/cold operator new for ambiguous allocation")); //===----------------------------------------------------------------------===// // Helper Functions @@ -1719,6 +1723,37 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) { return nullptr; } +// Allow existing calls to operator new() that takes a __hot_cold_t parameter to +// be updated with a compiler-determined hot cold hint value. This is used in +// cases where the call is marked nobuiltin (because operator new called +// explicitly) and therefore cannot be replaced with a different callee. +Value *LibCallSimplifier::optimizeExistingHotColdNew(CallInst *CI, + IRBuilderBase &B) { + if (!OptimizeHotColdNew || !OptimizeExistingHotColdNew) + return nullptr; + Function *Callee = CI->getCalledFunction(); + if (!Callee) + return nullptr; + LibFunc Func; + if (!TLI->getLibFunc(*Callee, Func)) + return nullptr; + switch (Func) { + case LibFunc_Znwm12__hot_cold_t: + case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: + case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: + case LibFunc_Znam12__hot_cold_t: + case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: + case LibFunc_ZnamSt11align_val_t12__hot_cold_t: + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: + case LibFunc_size_returning_new_hot_cold: + case LibFunc_size_returning_new_aligned_hot_cold: + return optimizeNew(CI, B, Func); + default: + return nullptr; + } +} + // When enabled, replace operator new() calls marked with a hot or cold memprof // attribute with an operator new() call that takes a __hot_cold_t parameter. // Currently this is supported by the open source version of tcmalloc, see: @@ -1736,6 +1771,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, HotCold = NotColdNewHintValue; else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "hot") HotCold = HotNewHintValue; + else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == + "ambiguous") + HotCold = AmbiguousNewHintValue; else return nullptr; @@ -1753,9 +1791,8 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, LibFunc_Znwm12__hot_cold_t, HotCold); break; case LibFunc_Znwm: - if (HotCold != NotColdNewHintValue) - return emitHotColdNew(CI->getArgOperand(0), B, TLI, - LibFunc_Znwm12__hot_cold_t, HotCold); + return emitHotColdNew(CI->getArgOperand(0), B, TLI, + LibFunc_Znwm12__hot_cold_t, HotCold); break; case LibFunc_Znam12__hot_cold_t: if (OptimizeExistingHotColdNew) @@ -1763,9 +1800,8 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, LibFunc_Znam12__hot_cold_t, HotCold); break; case LibFunc_Znam: - if (HotCold != NotColdNewHintValue) - return emitHotColdNew(CI->getArgOperand(0), B, TLI, - LibFunc_Znam12__hot_cold_t, HotCold); + return emitHotColdNew(CI->getArgOperand(0), B, TLI, + LibFunc_Znam12__hot_cold_t, HotCold); break; case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: if (OptimizeExistingHotColdNew) @@ -1774,10 +1810,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnwmRKSt9nothrow_t: - if (HotCold != NotColdNewHintValue) - return emitHotColdNewNoThrow( - CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, - LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold); + return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, + HotCold); break; case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: if (OptimizeExistingHotColdNew) @@ -1786,10 +1821,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamRKSt9nothrow_t: - if (HotCold != NotColdNewHintValue) - return emitHotColdNewNoThrow( - CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, - LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold); + return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, + HotCold); break; case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: if (OptimizeExistingHotColdNew) @@ -1798,10 +1832,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold); break; case LibFunc_ZnwmSt11align_val_t: - if (HotCold != NotColdNewHintValue) - return emitHotColdNewAligned( - CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, - LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold); + return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI, LibFunc_ZnwmSt11align_val_t12__hot_cold_t, + HotCold); break; case LibFunc_ZnamSt11align_val_t12__hot_cold_t: if (OptimizeExistingHotColdNew) @@ -1810,10 +1843,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamSt11align_val_t: - if (HotCold != NotColdNewHintValue) - return emitHotColdNewAligned( - CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, - LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold); + return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI, LibFunc_ZnamSt11align_val_t12__hot_cold_t, + HotCold); break; case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: if (OptimizeExistingHotColdNew) @@ -1823,11 +1855,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, HotCold); break; case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: - if (HotCold != NotColdNewHintValue) - return emitHotColdNewAlignedNoThrow( - CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, - TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, - HotCold); + return emitHotColdNewAlignedNoThrow( + CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, + TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: if (OptimizeExistingHotColdNew) @@ -1837,17 +1867,14 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, HotCold); break; case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: - if (HotCold != NotColdNewHintValue) - return emitHotColdNewAlignedNoThrow( - CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, - TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, - HotCold); + return emitHotColdNewAlignedNoThrow( + CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, + TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold); break; case LibFunc_size_returning_new: - if (HotCold != NotColdNewHintValue) - return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI, - LibFunc_size_returning_new_hot_cold, - HotCold); + return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI, + LibFunc_size_returning_new_hot_cold, + HotCold); break; case LibFunc_size_returning_new_hot_cold: if (OptimizeExistingHotColdNew) @@ -1856,10 +1883,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B, HotCold); break; case LibFunc_size_returning_new_aligned: - if (HotCold != NotColdNewHintValue) - return emitHotColdSizeReturningNewAligned( - CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, - LibFunc_size_returning_new_aligned_hot_cold, HotCold); + return emitHotColdSizeReturningNewAligned( + CI->getArgOperand(0), CI->getArgOperand(1), B, TLI, + LibFunc_size_returning_new_aligned_hot_cold, HotCold); break; case LibFunc_size_returning_new_aligned_hot_cold: if (OptimizeExistingHotColdNew) @@ -4094,8 +4120,11 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { // TODO: Split out the code below that operates on FP calls so that // we can all non-FP calls with the StrictFP attribute to be // optimized. - if (CI->isNoBuiltin()) - return nullptr; + if (CI->isNoBuiltin()) { + // If this is an existing call to a hot cold operator new, we can update the + // hint parameter value, which doesn't change the callee. + return optimizeExistingHotColdNew(CI, Builder); + } LibFunc Func; Function *Callee = CI->getCalledFunction(); diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp index d52d52a9b7d3..6319fd524ff0 100644 --- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -349,13 +349,7 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, KeyValue = Key->getValue(KeyStorage); if (KeyValue == "source") { - std::string Error; - Source = std::string(Value->getValue(ValueStorage)); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); - return false; - } } else if (KeyValue == "target") { Target = std::string(Value->getValue(ValueStorage)); } else if (KeyValue == "transform") { @@ -379,12 +373,22 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, // TODO see if there is a more elegant solution to selecting the rewrite // descriptor type - if (!Target.empty()) + if (!Target.empty()) { DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>( Source, Target, Naked)); - else - DL->push_back( - std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform)); + return true; + } + + { + std::string Error; + if (!Regex(Source).isValid(Error)) { + YS.printError(Descriptor, "invalid Source regex: " + Error); + return false; + } + } + + DL->push_back( + std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform)); return true; } @@ -418,13 +422,7 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, KeyValue = Key->getValue(KeyStorage); if (KeyValue == "source") { - std::string Error; - Source = std::string(Value->getValue(ValueStorage)); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); - return false; - } } else if (KeyValue == "target") { Target = std::string(Value->getValue(ValueStorage)); } else if (KeyValue == "transform") { @@ -441,13 +439,23 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, return false; } - if (!Target.empty()) + if (!Target.empty()) { DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>( Source, Target, /*Naked*/ false)); - else - DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>( - Source, Transform)); + return true; + } + + { + std::string Error; + if (!Regex(Source).isValid(Error)) { + YS.printError(Descriptor, "invalid Source regex: " + Error); + return false; + } + } + + DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>( + Source, Transform)); return true; } @@ -481,13 +489,7 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, KeyValue = Key->getValue(KeyStorage); if (KeyValue == "source") { - std::string Error; - Source = std::string(Value->getValue(ValueStorage)); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); - return false; - } } else if (KeyValue == "target") { Target = std::string(Value->getValue(ValueStorage)); } else if (KeyValue == "transform") { @@ -504,13 +506,23 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, return false; } - if (!Target.empty()) + if (!Target.empty()) { DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>( Source, Target, /*Naked*/ false)); - else - DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>( - Source, Transform)); + return true; + } + + { + std::string Error; + if (!Regex(Source).isValid(Error)) { + YS.printError(Descriptor, "invalid Source regex: " + Error); + return false; + } + } + + DL->push_back( + std::make_unique<PatternRewriteNamedAliasDescriptor>(Source, Transform)); return true; } |
