summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils')
-rw-r--r--llvm/lib/Transforms/Utils/CodeLayout.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/FunctionImportUtils.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/IRNormalizer.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp20
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp138
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LoopVersioning.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/ProfileVerify.cpp11
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp105
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp39
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp359
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp113
-rw-r--r--llvm/lib/Transforms/Utils/SymbolRewriter.cpp72
16 files changed, 576 insertions, 308 deletions
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index c76b3afef50c..27b13eeaf4d7 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -1285,7 +1285,7 @@ private:
// Cache misses on the merged chain
double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount;
double MergedSize = ChainPred->Size + ChainSucc->Size;
- double MergedDensity = static_cast<double>(MergedCounts) / MergedSize;
+ double MergedDensity = MergedCounts / MergedSize;
double NewScore = MergedCounts * missProbability(MergedDensity);
return CurScore - NewScore;
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 7063cde5263b..5a09b7385f2b 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -254,7 +254,6 @@ bool llvm::applyDebugifyMetadata(
}
if (ApplyToMF)
ApplyToMF(DIB, F);
- DIB.finalizeSubprogram(SP);
}
DIB.finalize();
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 3bbe875bbe9e..1a9e16be6989 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -13,6 +13,8 @@
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TimeProfiler.h"
+
using namespace llvm;
/// Uses the "source_filename" instead of a Module hash ID for the suffix of
@@ -370,6 +372,7 @@ void FunctionImportGlobalProcessing::run() { processGlobalsForThinLTO(); }
void llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index,
bool ClearDSOLocalOnDeclarations,
SetVector<GlobalValue *> *GlobalsToImport) {
+ llvm::TimeTraceScope timeScope("Rename module for ThinLTO");
FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport,
ClearDSOLocalOnDeclarations);
ThinLTOProcessing.run();
diff --git a/llvm/lib/Transforms/Utils/IRNormalizer.cpp b/llvm/lib/Transforms/Utils/IRNormalizer.cpp
index ad91318ae474..fefa49f68c8d 100644
--- a/llvm/lib/Transforms/Utils/IRNormalizer.cpp
+++ b/llvm/lib/Transforms/Utils/IRNormalizer.cpp
@@ -427,7 +427,7 @@ void IRNormalizer::reorderInstructions(Function &F) const {
// Process the remaining instructions.
//
// TODO: Do more a intelligent sorting of these instructions. For example,
- // seperate between dead instructinos and instructions used in another
+ // separate between dead instructinos and instructions used in another
// block. Use properties of the CFG the order instructions that are used
// in another block.
if (Visited.contains(&I))
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index ac344904f90f..2cfd70a1746c 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3397,8 +3397,8 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,
if (FP && Ty.isFloatingPointTy() && Ty.getScalarSizeInBits() <= 64) {
const APFloat &APF = FP->getValueAPF();
APInt const &API = APF.bitcastToAPInt();
- if (auto Temp = API.getZExtValue())
- return DIB.createConstantValueExpression(static_cast<uint64_t>(Temp));
+ if (uint64_t Temp = API.getZExtValue())
+ return DIB.createConstantValueExpression(Temp);
return DIB.createConstantValueExpression(*API.getRawData());
}
@@ -3838,8 +3838,8 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
const auto *Op = I->getOperand(OpIdx);
- // We can't have a PHI with a metadata type.
- if (Op->getType()->isMetadataTy())
+ // We can't have a PHI with a metadata or token type.
+ if (Op->getType()->isMetadataTy() || Op->getType()->isTokenLikeTy())
return false;
// swifterror pointers can only be used by a load, store, or as a swifterror
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index ba0ac01cadd8..735bad1cb134 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -225,9 +225,9 @@ protected:
// Auxiliary function to calculate the number of iterations for a comparison
// instruction or a binary operator.
- PeelCounter mergeTwoCounter(const Instruction &CmpOrBinaryOp,
- const PeelCounterValue &LHS,
- const PeelCounterValue &RHS) const;
+ PeelCounter mergeTwoCounters(const Instruction &CmpOrBinaryOp,
+ const PeelCounterValue &LHS,
+ const PeelCounterValue &RHS) const;
// Returns true if the \p Phi is an induction in the target loop. This is a
// lightweight check and possible to detect an IV in some cases.
@@ -269,15 +269,13 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
break;
// Avoid infinite loop.
- if (Visited.contains(Cur))
+ if (!Visited.insert(Cur).second)
return false;
auto *I = dyn_cast<Instruction>(Cur);
if (!I || !L.contains(I))
return false;
- Visited.insert(Cur);
-
if (auto *Cast = dyn_cast<CastInst>(I)) {
Cur = Cast->getOperand(0);
} else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
@@ -300,14 +298,14 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
/// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is
/// considered an IV only if it is an addition or a subtraction. Otherwise the
-/// result can be a value that is neither an loop-invariant nor an IV.
+/// result can be a value that is neither a loop-invariant nor an IV.
///
/// If both \p LHS and \p RHS are loop-invariants, then the result of
/// \CmpOrBinaryOp is also a loop-invariant.
PhiAnalyzer::PeelCounter
-PhiAnalyzer::mergeTwoCounter(const Instruction &CmpOrBinaryOp,
- const PeelCounterValue &LHS,
- const PeelCounterValue &RHS) const {
+PhiAnalyzer::mergeTwoCounters(const Instruction &CmpOrBinaryOp,
+ const PeelCounterValue &LHS,
+ const PeelCounterValue &RHS) const {
auto &[LVal, LTy] = LHS;
auto &[RVal, RTy] = RHS;
unsigned NewVal = std::max(LVal, RVal);
@@ -380,7 +378,7 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
if (RHS == Unknown)
return Unknown;
return (IterationsToInvarianceOrInduction[I] =
- mergeTwoCounter(*I, *LHS, *RHS));
+ mergeTwoCounters(*I, *LHS, *RHS));
}
if (I->isCast())
// Cast instructions get the value of the operand.
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 86b268de43cf..b18aceaa67d7 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -108,6 +109,9 @@ UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
#endif
);
+static cl::opt<bool> UnrollAddParallelReductions(
+ "unroll-add-parallel-reductions", cl::init(false), cl::Hidden,
+ cl::desc("Allow unrolling to add parallel reduction phis."));
/// Check if unrolling created a situation where we need to insert phi nodes to
/// preserve LCSSA form.
@@ -660,6 +664,41 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
OrigPHINode.push_back(cast<PHINode>(I));
}
+ // Collect phi nodes for reductions for which we can introduce multiple
+ // parallel reduction phis and compute the final reduction result after the
+ // loop. This requires a single exit block after unrolling. This is ensured by
+ // restricting to single-block loops where the unrolled iterations are known
+ // to not exit.
+ DenseMap<PHINode *, RecurrenceDescriptor> Reductions;
+ bool CanAddAdditionalAccumulators =
+ (UnrollAddParallelReductions.getNumOccurrences() > 0
+ ? UnrollAddParallelReductions
+ : ULO.AddAdditionalAccumulators) &&
+ !CompletelyUnroll && L->getNumBlocks() == 1 &&
+ (ULO.Runtime ||
+ (ExitInfos.contains(Header) && ((ExitInfos[Header].TripCount != 0 &&
+ ExitInfos[Header].BreakoutTrip == 0))));
+
+ // Limit parallelizing reductions to unroll counts of 4 or less for now.
+ // TODO: The number of parallel reductions should depend on the number of
+ // execution units. We also don't have to add a parallel reduction phi per
+ // unrolled iteration, but could for example add a parallel phi for every 2
+ // unrolled iterations.
+ if (CanAddAdditionalAccumulators && ULO.Count <= 4) {
+ for (PHINode &Phi : Header->phis()) {
+ auto RdxDesc = canParallelizeReductionWhenUnrolling(Phi, L, SE);
+ if (!RdxDesc)
+ continue;
+
+ // Only handle duplicate phis for a single reduction for now.
+ // TODO: Handle any number of reductions
+ if (!Reductions.empty())
+ continue;
+
+ Reductions[&Phi] = *RdxDesc;
+ }
+ }
+
std::vector<BasicBlock *> Headers;
std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
@@ -710,6 +749,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// latch. This is a reasonable default placement if we don't have block
// frequencies, and if we do, well the layout will be adjusted later.
auto BlockInsertPt = std::next(LatchBlock->getIterator());
+ SmallVector<Instruction *> PartialReductions;
for (unsigned It = 1; It != ULO.Count; ++It) {
SmallVector<BasicBlock *, 8> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -733,6 +773,31 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (PHINode *OrigPHI : OrigPHINode) {
PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+
+ // Use cloned phis as parallel phis for partial reductions, which will
+ // get combined to the final reduction result after the loop.
+ if (Reductions.contains(OrigPHI)) {
+ // Collect partial reduction results.
+ if (PartialReductions.empty())
+ PartialReductions.push_back(cast<Instruction>(InVal));
+ PartialReductions.push_back(cast<Instruction>(VMap[InVal]));
+
+ // Update the start value for the cloned phis to use the identity
+ // value for the reduction.
+ const RecurrenceDescriptor &RdxDesc = Reductions[OrigPHI];
+ NewPHI->setIncomingValueForBlock(
+ L->getLoopPreheader(),
+ getRecurrenceIdentity(RdxDesc.getRecurrenceKind(),
+ OrigPHI->getType(),
+ RdxDesc.getFastMathFlags()));
+
+ // Update NewPHI to use the cloned value for the iteration and move
+ // to header.
+ NewPHI->replaceUsesOfWith(InVal, VMap[InVal]);
+ NewPHI->moveBefore(OrigPHI->getIterator());
+ continue;
+ }
+
if (Instruction *InValI = dyn_cast<Instruction>(InVal))
if (It > 1 && L->contains(InValI))
InVal = LastValueMap[InValI];
@@ -832,6 +897,9 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
PN->eraseFromParent();
} else if (ULO.Count > 1) {
+ if (Reductions.contains(PN))
+ continue;
+
Value *InVal = PN->removeIncomingValue(LatchBlock, false);
// If this value was defined in the loop, take the value defined by the
// last iteration of the loop.
@@ -1010,6 +1078,38 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
+ // If there are partial reductions, create code in the exit block to compute
+ // the final result and update users of the final result.
+ if (!PartialReductions.empty()) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ assert(ExitBlock &&
+ "Can only introduce parallel reduction phis with single exit block");
+ assert(Reductions.size() == 1 &&
+ "currently only a single reduction is supported");
+ Value *FinalRdxValue = PartialReductions.back();
+ Value *RdxResult = nullptr;
+ for (PHINode &Phi : ExitBlock->phis()) {
+ if (Phi.getIncomingValueForBlock(L->getLoopLatch()) != FinalRdxValue)
+ continue;
+ if (!RdxResult) {
+ RdxResult = PartialReductions.front();
+ IRBuilder Builder(ExitBlock, ExitBlock->getFirstNonPHIIt());
+ RecurKind RK = Reductions.begin()->second.getRecurrenceKind();
+ for (Instruction *RdxPart : drop_begin(PartialReductions)) {
+ RdxResult = Builder.CreateBinOp(
+ (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK),
+ RdxPart, RdxResult, "bin.rdx");
+ }
+ NeedToFixLCSSA = true;
+ for (Instruction *RdxPart : PartialReductions)
+ RdxPart->dropPoisonGeneratingFlags();
+ }
+
+ Phi.replaceAllUsesWith(RdxResult);
+ continue;
+ }
+ }
+
if (DTUToUse) {
// Apply updates to the DomTree.
DT = &DTU.getDomTree();
@@ -1111,3 +1211,41 @@ MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
}
return nullptr;
}
+
+std::optional<RecurrenceDescriptor>
+llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L,
+ ScalarEvolution *SE) {
+ RecurrenceDescriptor RdxDesc;
+ if (!RecurrenceDescriptor::isReductionPHI(&Phi, L, RdxDesc,
+ /*DemandedBits=*/nullptr,
+ /*AC=*/nullptr, /*DT=*/nullptr, SE))
+ return std::nullopt;
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+ // Skip unsupported reductions.
+ // TODO: Handle additional reductions, including FP and min-max
+ // reductions.
+ if (!RecurrenceDescriptor::isIntegerRecurrenceKind(RK) ||
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
+ RecurrenceDescriptor::isFindIVRecurrenceKind(RK) ||
+ RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))
+ return std::nullopt;
+
+ if (RdxDesc.IntermediateStore)
+ return std::nullopt;
+
+ // Don't unroll reductions with constant ops; those can be folded to a
+ // single induction update.
+ if (any_of(cast<Instruction>(Phi.getIncomingValueForBlock(L->getLoopLatch()))
+ ->operands(),
+ IsaPred<Constant>))
+ return std::nullopt;
+
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch ||
+ !is_contained(
+ cast<Instruction>(Phi.getIncomingValueForBlock(Latch))->operands(),
+ &Phi))
+ return std::nullopt;
+
+ return RdxDesc;
+}
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 843364eb34f8..b172ef6ba080 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -2032,6 +2032,7 @@ Value *llvm::addRuntimeChecks(
MemoryRuntimeCheck = IsConflict;
}
+ Exp.eraseDeadInstructions(MemoryRuntimeCheck);
return MemoryRuntimeCheck;
}
@@ -2077,6 +2078,7 @@ Value *llvm::addDiffRuntimeChecks(
MemoryRuntimeCheck = IsConflict;
}
+ Expander.eraseDeadInstructions(MemoryRuntimeCheck);
return MemoryRuntimeCheck;
}
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 1711163fb9f5..ec2e6c1ab796 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -81,6 +81,8 @@ void LoopVersioning::versionLoop(
} else
RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
+ Exp.eraseDeadInstructions(SCEVRuntimeCheck);
+
assert(RuntimeCheck && "called even though we don't need "
"any runtime checks");
diff --git a/llvm/lib/Transforms/Utils/ProfileVerify.cpp b/llvm/lib/Transforms/Utils/ProfileVerify.cpp
index 41647f7717a4..faacd422c009 100644
--- a/llvm/lib/Transforms/Utils/ProfileVerify.cpp
+++ b/llvm/lib/Transforms/Utils/ProfileVerify.cpp
@@ -155,12 +155,15 @@ PreservedAnalyses ProfileVerifierPass::run(Function &F,
FunctionAnalysisManager &FAM) {
const auto EntryCount = F.getEntryCount(/*AllowSynthetic=*/true);
if (!EntryCount) {
- F.getContext().emitError("Profile verification failed: function entry "
- "count missing (set to 0 if cold)");
+ auto *MD = F.getMetadata(LLVMContext::MD_prof);
+ if (!MD || !isExplicitlyUnknownProfileMetadata(*MD)) {
+ F.getContext().emitError("Profile verification failed: function entry "
+ "count missing (set to 0 if cold)");
+ return PreservedAnalyses::all();
+ }
+ } else if (EntryCount->getCount() == 0) {
return PreservedAnalyses::all();
}
- if (EntryCount->getCount() == 0)
- return PreservedAnalyses::all();
for (const auto &BB : F) {
if (AnnotateSelect) {
for (const auto &I : BB)
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 10c162bc6463..d93a4d87f30f 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -849,9 +849,12 @@ void PromoteMem2Reg::run() {
for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
IncomingVals.init(i, UndefValue::get(Allocas[i]->getAllocatedType()));
- // When handling debug info, treat all incoming values as if they have unknown
- // locations until proven otherwise.
+ // When handling debug info, treat all incoming values as if they have
+ // compiler-generated (empty) locations, representing the uninitialized
+ // alloca, until proven otherwise.
IncomingLocs.resize(Allocas.size());
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+ IncomingLocs.init(i, DebugLoc::getCompilerGenerated());
// The renamer uses the Visited set to avoid infinite loops.
Visited.resize(F.getMaxBlockNumber(), false);
diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index d53a3144bf57..a814867652cd 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -21,29 +21,20 @@
using namespace llvm;
-static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
+struct LookupTableInfo {
+ Value *Index;
+ SmallVector<Constant *> Ptrs;
+};
+
+static bool shouldConvertToRelLookupTable(LookupTableInfo &Info, Module &M,
+ GlobalVariable &GV) {
// If lookup table has more than one user,
// do not generate a relative lookup table.
// This is to simplify the analysis that needs to be done for this pass.
// TODO: Add support for lookup tables with multiple uses.
// For ex, this can happen when a function that uses a lookup table gets
// inlined into multiple call sites.
- if (!GV.hasInitializer() ||
- !GV.isConstant() ||
- !GV.hasOneUse())
- return false;
-
- GetElementPtrInst *GEP =
- dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser());
- if (!GEP || !GEP->hasOneUse() ||
- GV.getValueType() != GEP->getSourceElementType())
- return false;
-
- LoadInst *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser());
- if (!Load || !Load->hasOneUse() ||
- Load->getType() != GEP->getResultElementType())
- return false;
-
+ //
// If the original lookup table does not have local linkage and is
// not dso_local, do not generate a relative lookup table.
// This optimization creates a relative lookup table that consists of
@@ -51,21 +42,40 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
// To be able to generate these offsets, relative lookup table and
// its elements should have internal linkage and be dso_local, which means
// that they should resolve to symbols within the same linkage unit.
- if (!GV.hasLocalLinkage() ||
- !GV.isDSOLocal() ||
- !GV.isImplicitDSOLocal())
+ if (!GV.hasInitializer() || !GV.isConstant() || !GV.hasOneUse() ||
+ !GV.hasLocalLinkage() || !GV.isDSOLocal() || !GV.isImplicitDSOLocal())
return false;
- ConstantArray *Array = dyn_cast<ConstantArray>(GV.getInitializer());
- if (!Array)
+ auto *GEP = dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser());
+ if (!GEP || !GEP->hasOneUse())
+ return false;
+
+ auto *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser());
+ if (!Load || !Load->hasOneUse())
return false;
// If values are not 64-bit pointers, do not generate a relative lookup table.
const DataLayout &DL = M.getDataLayout();
- Type *ElemType = Array->getType()->getElementType();
+ Type *ElemType = Load->getType();
if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64)
return false;
+ // Make sure this is a gep of the form GV + scale*var.
+ unsigned IndexWidth =
+ DL.getIndexTypeSizeInBits(Load->getPointerOperand()->getType());
+ SmallMapVector<Value *, APInt, 4> VarOffsets;
+ APInt ConstOffset(IndexWidth, 0);
+ if (!GEP->collectOffset(DL, IndexWidth, VarOffsets, ConstOffset) ||
+ !ConstOffset.isZero() || VarOffsets.size() != 1)
+ return false;
+
+ // This can't be a pointer lookup table if the stride is smaller than a
+ // pointer.
+ Info.Index = VarOffsets.front().first;
+ const APInt &Stride = VarOffsets.front().second;
+ if (Stride.ult(DL.getTypeStoreSize(ElemType)))
+ return false;
+
SmallVector<GlobalVariable *, 4> GVOps;
Triple TT = M.getTargetTriple();
// FIXME: This should be removed in the future.
@@ -80,14 +90,20 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
// https://github.com/rust-lang/rust/issues/141306.
|| (TT.isX86() && TT.isOSDarwin());
- for (const Use &Op : Array->operands()) {
- Constant *ConstOp = cast<Constant>(&Op);
+ APInt Offset(IndexWidth, 0);
+ uint64_t GVSize = DL.getTypeAllocSize(GV.getValueType());
+ for (; Offset.ult(GVSize); Offset += Stride) {
+ Constant *C =
+ ConstantFoldLoadFromConst(GV.getInitializer(), ElemType, Offset, DL);
+ if (!C)
+ return false;
+
GlobalValue *GVOp;
- APInt Offset;
+ APInt GVOffset;
// If an operand is not a constant offset from a lookup table,
// do not generate a relative lookup table.
- if (!IsConstantOffsetFromGlobal(ConstOp, GVOp, Offset, DL))
+ if (!IsConstantOffsetFromGlobal(C, GVOp, GVOffset, DL))
return false;
// If operand is mutable, do not generate a relative lookup table.
@@ -102,6 +118,8 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
if (ShouldDropUnnamedAddr)
GVOps.push_back(GlovalVarOp);
+
+ Info.Ptrs.push_back(C);
}
if (ShouldDropUnnamedAddr)
@@ -111,14 +129,12 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
return true;
}
-static GlobalVariable *createRelLookupTable(Function &Func,
+static GlobalVariable *createRelLookupTable(LookupTableInfo &Info,
+ Function &Func,
GlobalVariable &LookupTable) {
Module &M = *Func.getParent();
- ConstantArray *LookupTableArr =
- cast<ConstantArray>(LookupTable.getInitializer());
- unsigned NumElts = LookupTableArr->getType()->getNumElements();
ArrayType *IntArrayTy =
- ArrayType::get(Type::getInt32Ty(M.getContext()), NumElts);
+ ArrayType::get(Type::getInt32Ty(M.getContext()), Info.Ptrs.size());
GlobalVariable *RelLookupTable = new GlobalVariable(
M, IntArrayTy, LookupTable.isConstant(), LookupTable.getLinkage(),
@@ -127,10 +143,9 @@ static GlobalVariable *createRelLookupTable(Function &Func,
LookupTable.isExternallyInitialized());
uint64_t Idx = 0;
- SmallVector<Constant *, 64> RelLookupTableContents(NumElts);
+ SmallVector<Constant *, 64> RelLookupTableContents(Info.Ptrs.size());
- for (Use &Operand : LookupTableArr->operands()) {
- Constant *Element = cast<Constant>(Operand);
+ for (Constant *Element : Info.Ptrs) {
Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy);
Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy);
@@ -148,7 +163,8 @@ static GlobalVariable *createRelLookupTable(Function &Func,
return RelLookupTable;
}
-static void convertToRelLookupTable(GlobalVariable &LookupTable) {
+static void convertToRelLookupTable(LookupTableInfo &Info,
+ GlobalVariable &LookupTable) {
GetElementPtrInst *GEP =
cast<GetElementPtrInst>(LookupTable.use_begin()->getUser());
LoadInst *Load = cast<LoadInst>(GEP->use_begin()->getUser());
@@ -159,21 +175,21 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) {
Function &Func = *BB->getParent();
// Generate an array that consists of relative offsets.
- GlobalVariable *RelLookupTable = createRelLookupTable(Func, LookupTable);
+ GlobalVariable *RelLookupTable =
+ createRelLookupTable(Info, Func, LookupTable);
// Place new instruction sequence before GEP.
Builder.SetInsertPoint(GEP);
- Value *Index = GEP->getOperand(2);
- IntegerType *IntTy = cast<IntegerType>(Index->getType());
- Value *Offset =
- Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift");
+ IntegerType *IntTy = cast<IntegerType>(Info.Index->getType());
+ Value *Offset = Builder.CreateShl(Info.Index, ConstantInt::get(IntTy, 2),
+ "reltable.shift");
// Insert the call to load.relative intrinsic before LOAD.
// GEP might not be immediately followed by a LOAD, like it can be hoisted
// outside the loop or another instruction might be inserted them in between.
Builder.SetInsertPoint(Load);
Function *LoadRelIntrinsic = llvm::Intrinsic::getOrInsertDeclaration(
- &M, Intrinsic::load_relative, {Index->getType()});
+ &M, Intrinsic::load_relative, {Info.Index->getType()});
// Create a call to load.relative intrinsic that computes the target address
// by adding base address (lookup table address) and relative offset.
@@ -205,10 +221,11 @@ static bool convertToRelativeLookupTables(
bool Changed = false;
for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
- if (!shouldConvertToRelLookupTable(M, GV))
+ LookupTableInfo Info;
+ if (!shouldConvertToRelLookupTable(Info, M, GV))
continue;
- convertToRelLookupTable(GV);
+ convertToRelLookupTable(Info, GV);
// Remove the original lookup table.
GV.eraseFromParent();
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 060ca92e559a..28befd0aa1ce 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
@@ -175,6 +176,26 @@ SCEVExpander::findInsertPointAfter(Instruction *I,
return IP;
}
+void SCEVExpander::eraseDeadInstructions(Value *Root) {
+ SmallVector<Value *> WorkList;
+ SmallPtrSet<Value *, 8> DeletedValues;
+ append_range(WorkList, getAllInsertedInstructions());
+ while (!WorkList.empty()) {
+ Value *V = WorkList.pop_back_val();
+ if (DeletedValues.contains(V))
+ continue;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || I == Root || !isInsertedInstruction(I) ||
+ !isInstructionTriviallyDead(I))
+ continue;
+ append_range(WorkList, I->operands());
+ InsertedValues.erase(I);
+ InsertedPostIncValues.erase(I);
+ DeletedValues.insert(I);
+ I->eraseFromParent();
+ }
+}
+
BasicBlock::iterator
SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
// Cast the argument at the beginning of the entry block, after
@@ -1239,10 +1260,13 @@ Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) {
if (!isa<SCEVAddRecExpr>(ExitSCEV))
continue;
Type *PhiTy = PN.getType();
- if (STy->isIntegerTy() && PhiTy->isPointerTy())
+ if (STy->isIntegerTy() && PhiTy->isPointerTy()) {
ExitSCEV = SE.getPtrToIntExpr(ExitSCEV, STy);
- else if (S->getType() != PN.getType())
+ if (isa<SCEVCouldNotCompute>(ExitSCEV))
+ continue;
+ } else if (S->getType() != PN.getType()) {
continue;
+ }
// Check if we can re-use the existing PN, by adjusting it with an expanded
// offset, if the offset is simpler.
@@ -2184,8 +2208,15 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// negative. If Step is known to be positive or negative, only create
// either 1. or 2.
auto ComputeEndCheck = [&]() -> Value * {
- // Checking <u 0 is always false.
- if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
+ // Checking <u 0 is always false, if (Step * trunc ExitCount) does not wrap.
+ // TODO: Predicates that can be proven true/false should be discarded when
+ // the predicates are created, not late during expansion.
+ if (!Signed && Start->isZero() && SE.isKnownPositive(Step) &&
+ DstBits < SrcBits &&
+ ExitCount == SE.getZeroExtendExpr(SE.getTruncateExpr(ExitCount, ARTy),
+ ExitCount->getType()) &&
+ SE.willNotOverflow(Instruction::Mul, Signed, Step,
+ SE.getTruncateExpr(ExitCount, ARTy)))
return ConstantInt::getFalse(Loc->getContext());
// Get the backedge taken count and truncate or extended to the AR type.
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7a538ae2c583..970f85378d3d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -612,6 +612,18 @@ private:
/// If CompValue is already set, the function is expected to fail if a match
/// is found but the value compared to is different.
bool matchInstruction(Instruction *I, bool isEQ) {
+ if (match(I, m_Not(m_Instruction(I))))
+ isEQ = !isEQ;
+
+ Value *Val;
+ if (match(I, m_NUWTrunc(m_Value(Val)))) {
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(Val))
+ return false;
+ UsedICmps++;
+ Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
+ return true;
+ }
// If this is an icmp against a constant, handle this as one of the cases.
ICmpInst *ICI;
ConstantInt *C;
@@ -2260,10 +2272,6 @@ static bool canSinkInstructions(
for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
Value *Op = I0->getOperand(OI);
- if (Op->getType()->isTokenTy())
- // Don't touch any operand of token type.
- return false;
-
auto SameAsI0 = [&I0, OI](const Instruction *I) {
assert(I->getNumOperands() == I0->getNumOperands());
return I->getOperand(OI) == I0->getOperand(OI);
@@ -2764,8 +2772,7 @@ bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
Use &U1 = std::get<1>(Ops);
if (U0 == U1)
return false;
- return U0->getType()->isTokenTy() ||
- !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
+ return !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
U0.getOperandNo());
};
assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
@@ -4404,10 +4411,12 @@ static bool mergeConditionalStoreToAddress(
// OK, we're going to sink the stores to PostBB. The store has to be
// conditional though, so first create the predicate.
- Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
- ->getCondition();
- Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
- ->getCondition();
+ BranchInst *PBranch =
+ cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator());
+ BranchInst *QBranch =
+ cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator());
+ Value *PCond = PBranch->getCondition();
+ Value *QCond = QBranch->getCondition();
Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
PStore->getParent());
@@ -4418,13 +4427,11 @@ static bool mergeConditionalStoreToAddress(
IRBuilder<> QB(PostBB, PostBBFirst);
QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
- Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
- Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
+ InvertPCond ^= (PStore->getParent() != PTB);
+ InvertQCond ^= (QStore->getParent() != QTB);
+ Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
+ Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
- if (InvertPCond)
- PPred = QB.CreateNot(PPred);
- if (InvertQCond)
- QPred = QB.CreateNot(QPred);
Value *CombinedPred = QB.CreateOr(PPred, QPred);
BasicBlock::iterator InsertPt = QB.GetInsertPoint();
@@ -4808,23 +4815,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
SelectInst *NV = cast<SelectInst>(
Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
PN.setIncomingValue(PBBIdx, NV);
- // Although the select has the same condition as PBI, the original branch
- // weights for PBI do not apply to the new select because the select's
- // 'logical' edges are incoming edges of the phi that is eliminated, not
- // the outgoing edges of PBI.
+ // The select has the same condition as PBI, in the same BB. The
+ // probabilities don't change.
if (HasWeights) {
- uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
- uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
- uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
- uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
- // The weight to PredCommonDest should be PredCommon * SuccTotal.
- // The weight to PredOtherDest should be PredOther * SuccCommon.
- uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
- PredOther * SuccCommon};
-
- fitWeights(NewWeights);
-
- setBranchWeights(NV, NewWeights[0], NewWeights[1],
+ uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
+ setBranchWeights(NV, TrueWeight, FalseWeight,
/*IsExpected=*/false);
}
}
@@ -6437,34 +6433,42 @@ static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
namespace {
-/// This class represents a lookup table that can be used to replace a switch.
-class SwitchLookupTable {
+/// This class finds alternatives for switches to ultimately
+/// replace the switch.
+class SwitchReplacement {
public:
- /// Create a lookup table to use as a switch replacement with the contents
- /// of Values, using DefaultValue to fill any holes in the table.
- SwitchLookupTable(
+ /// Create a helper for optimizations to use as a switch replacement.
+ /// Find a better representation for the content of Values,
+ /// using DefaultValue to fill any holes in the table.
+ SwitchReplacement(
Module &M, uint64_t TableSize, ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
- /// Build instructions with Builder to retrieve the value at
- /// the position given by Index in the lookup table.
- Value *buildLookup(Value *Index, IRBuilder<> &Builder, const DataLayout &DL);
+ /// Build instructions with Builder to retrieve values using Index
+ /// and replace the switch.
+ Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
+ Function *Func);
/// Return true if a table with TableSize elements of
/// type ElementType would fit in a target-legal register.
static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
Type *ElementType);
+ /// Return the default value of the switch.
+ Constant *getDefaultValue();
+
+ /// Return true if the replacement is a lookup table.
+ bool isLookupTable();
+
private:
- // Depending on the contents of the table, it can be represented in
- // different ways.
+ // Depending on the switch, there are different alternatives.
enum {
- // For tables where each element contains the same value, we just have to
+ // For switches where each case contains the same value, we just have to
// store that single value and return it for each lookup.
SingleValueKind,
- // For tables where there is a linear relationship between table index
+ // For switches where there is a linear relationship between table index
// and values. We calculate the result with a simple multiplication
// and addition instead of a table lookup.
LinearMapKind,
@@ -6476,9 +6480,15 @@ private:
// The table is stored as an array of values. Values are retrieved by load
// instructions from the table.
- ArrayKind
+ LookupTableKind
} Kind;
+ // The default value of the switch.
+ Constant *DefaultValue;
+
+ // The type of the output values.
+ Type *ValueType;
+
// For SingleValueKind, this is the single value.
Constant *SingleValue = nullptr;
@@ -6491,23 +6501,24 @@ private:
ConstantInt *LinearMultiplier = nullptr;
bool LinearMapValWrapped = false;
- // For ArrayKind, this is the array.
- GlobalVariable *Array = nullptr;
+ // For LookupTableKind, this is the table.
+ Constant *Initializer = nullptr;
};
} // end anonymous namespace
-SwitchLookupTable::SwitchLookupTable(
+SwitchReplacement::SwitchReplacement(
Module &M, uint64_t TableSize, ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
+ : DefaultValue(DefaultValue) {
assert(Values.size() && "Can't build lookup table without values!");
assert(TableSize >= Values.size() && "Can't fit values in table!");
// If all values in the table are equal, this is that value.
SingleValue = Values.begin()->second;
- Type *ValueType = Values.begin()->second->getType();
+ ValueType = Values.begin()->second->getType();
// Build up the table contents.
SmallVector<Constant *, 64> TableContents(TableSize);
@@ -6597,7 +6608,6 @@ SwitchLookupTable::SwitchLookupTable(
(void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
LinearMapValWrapped = NonMonotonic || MayWrap;
Kind = LinearMapKind;
- ++NumLinearMaps;
return;
}
}
@@ -6617,30 +6627,23 @@ SwitchLookupTable::SwitchLookupTable(
BitMap = ConstantInt::get(M.getContext(), TableInt);
BitMapElementTy = IT;
Kind = BitMapKind;
- ++NumBitMaps;
return;
}
// Store the table in an array.
- ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
- Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
-
- Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
- GlobalVariable::PrivateLinkage, Initializer,
- "switch.table." + FuncName);
- Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- // Set the alignment to that of an array items. We will be only loading one
- // value out of it.
- Array->setAlignment(DL.getPrefTypeAlign(ValueType));
- Kind = ArrayKind;
+ auto *TableTy = ArrayType::get(ValueType, TableSize);
+ Initializer = ConstantArray::get(TableTy, TableContents);
+
+ Kind = LookupTableKind;
}
-Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder,
- const DataLayout &DL) {
+Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
+ const DataLayout &DL, Function *Func) {
switch (Kind) {
case SingleValueKind:
return SingleValue;
case LinearMapKind: {
+ ++NumLinearMaps;
// Derive the result value from the input value.
Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
false, "switch.idx.cast");
@@ -6656,6 +6659,7 @@ Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder,
return Result;
}
case BitMapKind: {
+ ++NumBitMaps;
// Type of the bitmap (e.g. i59).
IntegerType *MapTy = BitMap->getIntegerType();
@@ -6677,9 +6681,18 @@ Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder,
// Mask off.
return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
}
- case ArrayKind: {
- Type *IndexTy = DL.getIndexType(Array->getType());
- auto *ArrayTy = cast<ArrayType>(Array->getValueType());
+ case LookupTableKind: {
+ ++NumLookupTables;
+ auto *Table =
+ new GlobalVariable(*Func->getParent(), Initializer->getType(),
+ /*isConstant=*/true, GlobalVariable::PrivateLinkage,
+ Initializer, "switch.table." + Func->getName());
+ Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ // Set the alignment to that of an array items. We will be only loading one
+ // value out of it.
+ Table->setAlignment(DL.getPrefTypeAlign(ValueType));
+ Type *IndexTy = DL.getIndexType(Table->getType());
+ auto *ArrayTy = cast<ArrayType>(Table->getValueType());
if (Index->getType() != IndexTy) {
unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
@@ -6691,14 +6704,14 @@ Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder,
Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
Value *GEP =
- Builder.CreateInBoundsGEP(ArrayTy, Array, GEPIndices, "switch.gep");
+ Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
}
}
- llvm_unreachable("Unknown lookup table kind!");
+ llvm_unreachable("Unknown helper kind!");
}
-bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
+bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
uint64_t TableSize,
Type *ElementType) {
auto *IT = dyn_cast<IntegerType>(ElementType);
@@ -6734,6 +6747,10 @@ static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
DL.fitsInLegalInteger(IT->getBitWidth());
}
+Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
+
+bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
+
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
// 40% is the default density for building a jump table in optsize/minsize
// mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
@@ -6760,25 +6777,23 @@ static bool isSwitchDense(ArrayRef<int64_t> Values) {
// TODO: We could support larger than legal types by limiting based on the
// number of loads required and/or table size. If the constants are small we
// could use smaller table entries and extend after the load.
-static bool
-shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
- const TargetTransformInfo &TTI, const DataLayout &DL,
- const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
+static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL,
+ const SmallVector<Type *> &ResultTypes) {
if (SI->getNumCases() > TableSize)
return false; // TableSize overflowed.
bool AllTablesFitInRegister = true;
bool HasIllegalType = false;
- for (const auto &I : ResultTypes) {
- Type *Ty = I.second;
-
+ for (const auto &Ty : ResultTypes) {
// Saturate this flag to true.
HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
// Saturate this flag to false.
AllTablesFitInRegister =
AllTablesFitInRegister &&
- SwitchLookupTable::wouldFitInRegister(DL, TableSize, Ty);
+ SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
// If both flags saturate, we're done. NOTE: This *only* works with
// saturating flags, and all flags have to saturate first due to the
@@ -6800,7 +6815,7 @@ shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
static bool shouldUseSwitchConditionAsTableIndex(
ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
- bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
+ bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
const DataLayout &DL, const TargetTransformInfo &TTI) {
if (MinCaseVal.isNullValue())
return true;
@@ -6808,10 +6823,9 @@ static bool shouldUseSwitchConditionAsTableIndex(
MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
!HasDefaultResults)
return false;
- return all_of(ResultTypes, [&](const auto &KV) {
- return SwitchLookupTable::wouldFitInRegister(
- DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
- KV.second /* ResultType */);
+ return all_of(ResultTypes, [&](const auto &ResultType) {
+ return SwitchReplacement::wouldFitInRegister(
+ DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
});
}
@@ -6900,18 +6914,13 @@ static void reuseTableCompare(
/// If the switch is only used to initialize one or more phi nodes in a common
/// successor block with different constant values, replace the switch with
/// lookup tables.
-static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
- DomTreeUpdater *DTU, const DataLayout &DL,
- const TargetTransformInfo &TTI) {
+static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU, const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
BasicBlock *BB = SI->getParent();
Function *Fn = BB->getParent();
- // Only build lookup table when we have a target that supports it or the
- // attribute is not set.
- if (!TTI.shouldBuildLookupTables() ||
- (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
- return false;
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
// split off a dense part and build a lookup table for that.
@@ -6938,7 +6947,7 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
SmallDenseMap<PHINode *, ResultListTy> ResultLists;
SmallDenseMap<PHINode *, Constant *> DefaultResults;
- SmallDenseMap<PHINode *, Type *> ResultTypes;
+ SmallVector<Type *> ResultTypes;
SmallVector<PHINode *, 4> PHIs;
for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
@@ -6955,7 +6964,8 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Results, DL, TTI))
return false;
- // Append the result from this case to the list for each phi.
+ // Append the result and result types from this case to the list for each
+ // phi.
for (const auto &I : Results) {
PHINode *PHI = I.first;
Constant *Value = I.second;
@@ -6963,23 +6973,16 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (Inserted)
PHIs.push_back(PHI);
It->second.push_back(std::make_pair(CaseVal, Value));
+ ResultTypes.push_back(PHI->getType());
}
}
- // Keep track of the result types.
- for (PHINode *PHI : PHIs) {
- ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
- }
-
- uint64_t NumResults = ResultLists[PHIs[0]].size();
-
// If the table has holes, we need a constant result for the default case
// or a bitmask that fits in a register.
SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
bool HasDefaultResults =
getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
DefaultResultsList, DL, TTI);
-
for (const auto &I : DefaultResultsList) {
PHINode *PHI = I.first;
Constant *Result = I.second;
@@ -6989,15 +6992,21 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
*MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
uint64_t TableSize;
- if (UseSwitchConditionAsTableIndex)
+ ConstantInt *TableIndexOffset;
+ if (UseSwitchConditionAsTableIndex) {
TableSize = MaxCaseVal->getLimitedValue() + 1;
- else
+ TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
+ } else {
TableSize =
(MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
+ TableIndexOffset = MinCaseVal;
+ }
+
// If the default destination is unreachable, or if the lookup table covers
// all values of the conditional variable, branch directly to the lookup table
// BB. Otherwise, check that the condition is within the case range.
+ uint64_t NumResults = ResultLists[PHIs[0]].size();
bool DefaultIsReachable = !SI->defaultDestUnreachable();
bool TableHasHoles = (NumResults < TableSize);
@@ -7025,68 +7034,100 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
return false;
- std::vector<DominatorTree::UpdateType> Updates;
-
- // Compute the maximum table size representable by the integer type we are
- // switching upon.
- unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
- uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
- assert(MaxTableSize >= TableSize &&
- "It is impossible for a switch to have more entries than the max "
- "representable value of its input integer type's size.");
-
- // Create the BB that does the lookups.
- Module &Mod = *CommonDest->getParent()->getParent();
- BasicBlock *LookupBB = BasicBlock::Create(
- Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
-
// Compute the table index value.
- Builder.SetInsertPoint(SI);
Value *TableIndex;
- ConstantInt *TableIndexOffset;
if (UseSwitchConditionAsTableIndex) {
- TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
TableIndex = SI->getCondition();
- } else {
- TableIndexOffset = MinCaseVal;
+ if (HasDefaultResults) {
+ // Grow the table to cover all possible index values to avoid the range
+ // check. It will use the default result to fill in the table hole later,
+ // so make sure it exist.
+ ConstantRange CR =
+ computeConstantRange(TableIndex, /* ForSigned */ false);
+ // Grow the table shouldn't have any size impact by checking
+ // wouldFitInRegister.
+ // TODO: Consider growing the table also when it doesn't fit in a register
+ // if no optsize is specified.
+ const uint64_t UpperBound = CR.getUpper().getLimitedValue();
+ if (!CR.isUpperWrapped() &&
+ all_of(ResultTypes, [&](const auto &ResultType) {
+ return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
+ ResultType);
+ })) {
+ // There may be some case index larger than the UpperBound (unreachable
+ // case), so make sure the table size does not get smaller.
+ TableSize = std::max(UpperBound, TableSize);
+ // The default branch is unreachable after we enlarge the lookup table.
+ // Adjust DefaultIsReachable to reuse code path.
+ DefaultIsReachable = false;
+ }
+ }
+ }
+
+ // Keep track of the switch replacement for each phi
+ SmallDenseMap<PHINode *, SwitchReplacement> PhiToReplacementMap;
+ for (PHINode *PHI : PHIs) {
+ const auto &ResultList = ResultLists[PHI];
+
+ Type *ResultType = ResultList.begin()->second->getType();
+ // Use any value to fill the lookup table holes.
+ Constant *DefaultVal =
+ AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
+ StringRef FuncName = Fn->getName();
+ SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
+ ResultList, DefaultVal, DL, FuncName);
+ PhiToReplacementMap.insert({PHI, Replacement});
+ }
+
+ bool AnyLookupTables = any_of(
+ PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
+
+ // A few conditions prevent the generation of lookup tables:
+ // 1. The target does not support lookup tables.
+ // 2. The "no-jump-tables" function attribute is set.
+ // However, these objections do not apply to other switch replacements, like
+ // the bitmap, so we only stop here if any of these conditions are met and we
+ // want to create a LUT. Otherwise, continue with the switch replacement.
+ if (AnyLookupTables &&
+ (!TTI.shouldBuildLookupTables() ||
+ Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
+ return false;
+
+ Builder.SetInsertPoint(SI);
+ // TableIndex is the switch condition - TableIndexOffset if we don't
+ // use the condition directly
+ if (!UseSwitchConditionAsTableIndex) {
// If the default is unreachable, all case values are s>= MinCaseVal. Then
// we can try to attach nsw.
bool MayWrap = true;
if (!DefaultIsReachable) {
- APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
+ APInt Res =
+ MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
(void)Res;
}
-
TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
"switch.tableidx", /*HasNUW =*/false,
/*HasNSW =*/!MayWrap);
}
- BranchInst *RangeCheckBranch = nullptr;
+ std::vector<DominatorTree::UpdateType> Updates;
- // Grow the table to cover all possible index values to avoid the range check.
- // It will use the default result to fill in the table hole later, so make
- // sure it exist.
- if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
- ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
- // Grow the table shouldn't have any size impact by checking
- // wouldFitInRegister.
- // TODO: Consider growing the table also when it doesn't fit in a register
- // if no optsize is specified.
- const uint64_t UpperBound = CR.getUpper().getLimitedValue();
- if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
- return SwitchLookupTable::wouldFitInRegister(
- DL, UpperBound, KV.second /* ResultType */);
- })) {
- // There may be some case index larger than the UpperBound (unreachable
- // case), so make sure the table size does not get smaller.
- TableSize = std::max(UpperBound, TableSize);
- // The default branch is unreachable after we enlarge the lookup table.
- // Adjust DefaultIsReachable to reuse code path.
- DefaultIsReachable = false;
- }
- }
+ // Compute the maximum table size representable by the integer type we are
+ // switching upon.
+ unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
+ assert(MaxTableSize >= TableSize &&
+ "It is impossible for a switch to have more entries than the max "
+ "representable value of its input integer type's size.");
+
+ // Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
+ BasicBlock *LookupBB = BasicBlock::Create(
+ Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
+
+ BranchInst *RangeCheckBranch = nullptr;
+ Builder.SetInsertPoint(SI);
const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
@@ -7157,25 +7198,16 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
for (PHINode *PHI : PHIs) {
const ResultListTy &ResultList = ResultLists[PHI];
-
- Type *ResultType = ResultList.begin()->second->getType();
-
- // Use any value to fill the lookup table holes.
- Constant *DV =
- AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
- StringRef FuncName = Fn->getName();
- SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
- DL, FuncName);
-
- Value *Result = Table.buildLookup(TableIndex, Builder, DL);
-
+ auto Replacement = PhiToReplacementMap.at(PHI);
+ auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
// Do a small peephole optimization: re-use the switch table compare if
// possible.
if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
BasicBlock *PhiBlock = PHI->getParent();
// Search for compare instructions which use the phi.
for (auto *User : PHI->users()) {
- reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
+ reuseTableCompare(User, PhiBlock, RangeCheckBranch,
+ Replacement.getDefaultValue(), ResultList);
}
}
@@ -7202,7 +7234,6 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (DTU)
DTU->applyUpdates(Updates);
- ++NumLookupTables;
if (NeedMask)
++NumLookupTablesHoles;
return true;
@@ -7708,7 +7739,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// CVP. Therefore, only apply this transformation during late stages of the
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
- switchToLookupTable(SI, Builder, DTU, DL, TTI))
+ simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 2d6a748f4507..8acebbaa5458 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -97,6 +97,10 @@ static cl::opt<unsigned, false, HotColdHintParser>
static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue(
"hot-new-hint-value", cl::Hidden, cl::init(254),
cl::desc("Value to pass to hot/cold operator new for hot allocation"));
+static cl::opt<unsigned, false, HotColdHintParser> AmbiguousNewHintValue(
+ "ambiguous-new-hint-value", cl::Hidden, cl::init(222),
+ cl::desc(
+ "Value to pass to hot/cold operator new for ambiguous allocation"));
//===----------------------------------------------------------------------===//
// Helper Functions
@@ -1719,6 +1723,37 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
return nullptr;
}
+// Allow existing calls to operator new() that takes a __hot_cold_t parameter to
+// be updated with a compiler-determined hot cold hint value. This is used in
+// cases where the call is marked nobuiltin (because operator new called
+// explicitly) and therefore cannot be replaced with a different callee.
+Value *LibCallSimplifier::optimizeExistingHotColdNew(CallInst *CI,
+ IRBuilderBase &B) {
+ if (!OptimizeHotColdNew || !OptimizeExistingHotColdNew)
+ return nullptr;
+ Function *Callee = CI->getCalledFunction();
+ if (!Callee)
+ return nullptr;
+ LibFunc Func;
+ if (!TLI->getLibFunc(*Callee, Func))
+ return nullptr;
+ switch (Func) {
+ case LibFunc_Znwm12__hot_cold_t:
+ case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
+ case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
+ case LibFunc_Znam12__hot_cold_t:
+ case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
+ case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
+ case LibFunc_size_returning_new_hot_cold:
+ case LibFunc_size_returning_new_aligned_hot_cold:
+ return optimizeNew(CI, B, Func);
+ default:
+ return nullptr;
+ }
+}
+
// When enabled, replace operator new() calls marked with a hot or cold memprof
// attribute with an operator new() call that takes a __hot_cold_t parameter.
// Currently this is supported by the open source version of tcmalloc, see:
@@ -1736,6 +1771,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
HotCold = NotColdNewHintValue;
else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "hot")
HotCold = HotNewHintValue;
+ else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() ==
+ "ambiguous")
+ HotCold = AmbiguousNewHintValue;
else
return nullptr;
@@ -1753,9 +1791,8 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
LibFunc_Znwm12__hot_cold_t, HotCold);
break;
case LibFunc_Znwm:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdNew(CI->getArgOperand(0), B, TLI,
- LibFunc_Znwm12__hot_cold_t, HotCold);
+ return emitHotColdNew(CI->getArgOperand(0), B, TLI,
+ LibFunc_Znwm12__hot_cold_t, HotCold);
break;
case LibFunc_Znam12__hot_cold_t:
if (OptimizeExistingHotColdNew)
@@ -1763,9 +1800,8 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
LibFunc_Znam12__hot_cold_t, HotCold);
break;
case LibFunc_Znam:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdNew(CI->getArgOperand(0), B, TLI,
- LibFunc_Znam12__hot_cold_t, HotCold);
+ return emitHotColdNew(CI->getArgOperand(0), B, TLI,
+ LibFunc_Znam12__hot_cold_t, HotCold);
break;
case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
@@ -1774,10 +1810,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnwmRKSt9nothrow_t:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdNewNoThrow(
- CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
- LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
+ return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t,
+ HotCold);
break;
case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
@@ -1786,10 +1821,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnamRKSt9nothrow_t:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdNewNoThrow(
- CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
- LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
+ return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t,
+ HotCold);
break;
case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
@@ -1798,10 +1832,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnwmSt11align_val_t:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdNewAligned(
- CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
- LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
+ return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI, LibFunc_ZnwmSt11align_val_t12__hot_cold_t,
+ HotCold);
break;
case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
@@ -1810,10 +1843,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnamSt11align_val_t:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdNewAligned(
- CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
- LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
+ return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI, LibFunc_ZnamSt11align_val_t12__hot_cold_t,
+ HotCold);
break;
case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
@@ -1823,11 +1855,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
HotCold);
break;
case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdNewAlignedNoThrow(
- CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
- TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
- HotCold);
+ return emitHotColdNewAlignedNoThrow(
+ CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
+ TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
if (OptimizeExistingHotColdNew)
@@ -1837,17 +1867,14 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
HotCold);
break;
case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdNewAlignedNoThrow(
- CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
- TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
- HotCold);
+ return emitHotColdNewAlignedNoThrow(
+ CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
+ TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
break;
case LibFunc_size_returning_new:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
- LibFunc_size_returning_new_hot_cold,
- HotCold);
+ return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
+ LibFunc_size_returning_new_hot_cold,
+ HotCold);
break;
case LibFunc_size_returning_new_hot_cold:
if (OptimizeExistingHotColdNew)
@@ -1856,10 +1883,9 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
HotCold);
break;
case LibFunc_size_returning_new_aligned:
- if (HotCold != NotColdNewHintValue)
- return emitHotColdSizeReturningNewAligned(
- CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
- LibFunc_size_returning_new_aligned_hot_cold, HotCold);
+ return emitHotColdSizeReturningNewAligned(
+ CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
+ LibFunc_size_returning_new_aligned_hot_cold, HotCold);
break;
case LibFunc_size_returning_new_aligned_hot_cold:
if (OptimizeExistingHotColdNew)
@@ -4094,8 +4120,11 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
// TODO: Split out the code below that operates on FP calls so that
// we can all non-FP calls with the StrictFP attribute to be
// optimized.
- if (CI->isNoBuiltin())
- return nullptr;
+ if (CI->isNoBuiltin()) {
+ // If this is an existing call to a hot cold operator new, we can update the
+ // hint parameter value, which doesn't change the callee.
+ return optimizeExistingHotColdNew(CI, Builder);
+ }
LibFunc Func;
Function *Callee = CI->getCalledFunction();
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index d52d52a9b7d3..6319fd524ff0 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -349,13 +349,7 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
KeyValue = Key->getValue(KeyStorage);
if (KeyValue == "source") {
- std::string Error;
-
Source = std::string(Value->getValue(ValueStorage));
- if (!Regex(Source).isValid(Error)) {
- YS.printError(Field.getKey(), "invalid regex: " + Error);
- return false;
- }
} else if (KeyValue == "target") {
Target = std::string(Value->getValue(ValueStorage));
} else if (KeyValue == "transform") {
@@ -379,12 +373,22 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
// TODO see if there is a more elegant solution to selecting the rewrite
// descriptor type
- if (!Target.empty())
+ if (!Target.empty()) {
DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>(
Source, Target, Naked));
- else
- DL->push_back(
- std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
+ return true;
+ }
+
+ {
+ std::string Error;
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Descriptor, "invalid Source regex: " + Error);
+ return false;
+ }
+ }
+
+ DL->push_back(
+ std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
return true;
}
@@ -418,13 +422,7 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
KeyValue = Key->getValue(KeyStorage);
if (KeyValue == "source") {
- std::string Error;
-
Source = std::string(Value->getValue(ValueStorage));
- if (!Regex(Source).isValid(Error)) {
- YS.printError(Field.getKey(), "invalid regex: " + Error);
- return false;
- }
} else if (KeyValue == "target") {
Target = std::string(Value->getValue(ValueStorage));
} else if (KeyValue == "transform") {
@@ -441,13 +439,23 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
return false;
}
- if (!Target.empty())
+ if (!Target.empty()) {
DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
Source, Target,
/*Naked*/ false));
- else
- DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
- Source, Transform));
+ return true;
+ }
+
+ {
+ std::string Error;
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Descriptor, "invalid Source regex: " + Error);
+ return false;
+ }
+ }
+
+ DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
+ Source, Transform));
return true;
}
@@ -481,13 +489,7 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
KeyValue = Key->getValue(KeyStorage);
if (KeyValue == "source") {
- std::string Error;
-
Source = std::string(Value->getValue(ValueStorage));
- if (!Regex(Source).isValid(Error)) {
- YS.printError(Field.getKey(), "invalid regex: " + Error);
- return false;
- }
} else if (KeyValue == "target") {
Target = std::string(Value->getValue(ValueStorage));
} else if (KeyValue == "transform") {
@@ -504,13 +506,23 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
return false;
}
- if (!Target.empty())
+ if (!Target.empty()) {
DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>(
Source, Target,
/*Naked*/ false));
- else
- DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>(
- Source, Transform));
+ return true;
+ }
+
+ {
+ std::string Error;
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Descriptor, "invalid Source regex: " + Error);
+ return false;
+ }
+ }
+
+ DL->push_back(
+ std::make_unique<PatternRewriteNamedAliasDescriptor>(Source, Transform));
return true;
}