summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO
diff options
context:
space:
mode:
authorOliver Hunt <oliver@apple.com>2025-10-20 01:38:07 -0700
committerGitHub <noreply@github.com>2025-10-20 01:38:07 -0700
commit7de01aa5d0418bd4e8db2917f831e7383c6863bb (patch)
tree1db866f57c2236573cd4b4c2d141d6d420f87a92 /llvm/lib/Transforms/IPO
parent6bc540043d4c3fed8f44c8f6de86be0d1740582e (diff)
parent46a866ab7735aaa0f89fde209d516271c4825c49 (diff)
Merge branch 'main' into users/ojhunt/ptrauth-additionsusers/ojhunt/ptrauth-additions
Diffstat (limited to 'llvm/lib/Transforms/IPO')
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp119
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp11
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp16
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp235
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp11
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp12
9 files changed, 326 insertions, 88 deletions
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 8d9a0e7eaef6..50130da01c7b 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -2067,6 +2067,36 @@ static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
AI.run(SCCNodes, Changed);
}
+// Determines if the function 'F' can be marked 'norecurse'.
+// It returns true if any call within 'F' could lead to a recursive
+// call back to 'F', and false otherwise.
+// The 'AnyFunctionsAddressIsTaken' parameter is a module-wide flag
+// that is true if any function's address is taken, or if any function
+// has external linkage. This is used to determine the safety of
+// external/library calls.
+static bool mayHaveRecursiveCallee(Function &F,
+ bool AnyFunctionsAddressIsTaken = true) {
+ for (const auto &BB : F) {
+ for (const auto &I : BB.instructionsWithoutDebug()) {
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ const Function *Callee = CB->getCalledFunction();
+ if (!Callee || Callee == &F)
+ return true;
+
+ if (Callee->doesNotRecurse())
+ continue;
+
+ if (!AnyFunctionsAddressIsTaken ||
+ (Callee->isDeclaration() &&
+ Callee->hasFnAttribute(Attribute::NoCallback)))
+ continue;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
SmallPtrSet<Function *, 8> &Changed) {
// Try and identify functions that do not recurse.
@@ -2078,28 +2108,14 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
Function *F = *SCCNodes.begin();
if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
return;
-
- // If all of the calls in F are identifiable and are to norecurse functions, F
- // is norecurse. This check also detects self-recursion as F is not currently
- // marked norecurse, so any called from F to F will not be marked norecurse.
- for (auto &BB : *F)
- for (auto &I : BB.instructionsWithoutDebug())
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- Function *Callee = CB->getCalledFunction();
- if (!Callee || Callee == F ||
- (!Callee->doesNotRecurse() &&
- !(Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::NoCallback))))
- // Function calls a potentially recursive function.
- return;
- }
-
- // Every call was to a non-recursive function other than this function, and
- // we have no indirect recursion as the SCC size is one. This function cannot
- // recurse.
- F->setDoesNotRecurse();
- ++NumNoRecurse;
- Changed.insert(F);
+ if (!mayHaveRecursiveCallee(*F)) {
+ // Every call was to a non-recursive function other than this function, and
+ // we have no indirect recursion as the SCC size is one. This function
+ // cannot recurse.
+ F->setDoesNotRecurse();
+ ++NumNoRecurse;
+ Changed.insert(F);
+ }
}
// Set the noreturn function attribute if possible.
@@ -2429,3 +2445,62 @@ ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
PA.preserve<LazyCallGraphAnalysis>();
return PA;
}
+
+PreservedAnalyses NoRecurseLTOInferencePass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+
+ // Check if any function in the whole program has its address taken or has
+ // potentially external linkage.
+ // We use this information when inferring norecurse attribute: If there is
+ // no function whose address is taken and all functions have internal
+ // linkage, there is no path for a callback to any user function.
+ bool AnyFunctionsAddressIsTaken = false;
+ for (Function &F : M) {
+ if (F.isDeclaration() || F.doesNotRecurse())
+ continue;
+ if (!F.hasLocalLinkage() || F.hasAddressTaken()) {
+ AnyFunctionsAddressIsTaken = true;
+ break;
+ }
+ }
+
+ // Run norecurse inference on all RefSCCs in the LazyCallGraph for this
+ // module.
+ bool Changed = false;
+ LazyCallGraph &CG = MAM.getResult<LazyCallGraphAnalysis>(M);
+ CG.buildRefSCCs();
+
+ for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+ // Skip any RefSCC that is part of a call cycle. A RefSCC containing more
+ // than one SCC indicates a recursive relationship involving indirect calls.
+ if (RC.size() > 1)
+ continue;
+
+ // RefSCC contains a single-SCC. SCC size > 1 indicates mutually recursive
+ // functions. Ex: foo1 -> foo2 -> foo3 -> foo1.
+ LazyCallGraph::SCC &S = *RC.begin();
+ if (S.size() > 1)
+ continue;
+
+ // Get the single function from this SCC.
+ Function &F = S.begin()->getFunction();
+ if (!F.hasExactDefinition() || F.doesNotRecurse())
+ continue;
+
+ // If the analysis confirms that this function has no recursive calls
+ // (either direct, indirect, or through external linkages),
+ // we can safely apply the norecurse attribute.
+ if (!mayHaveRecursiveCallee(F, AnyFunctionsAddressIsTaken)) {
+ F.setDoesNotRecurse();
+ ++NumNoRecurse;
+ Changed = true;
+ }
+ }
+
+ PreservedAnalyses PA;
+ if (Changed)
+ PA.preserve<LazyCallGraphAnalysis>();
+ else
+ PA = PreservedAnalyses::all();
+ return PA;
+}
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 83aa7de5400f..28ee4449421b 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -72,6 +72,7 @@ STATISTIC(NumImportedModules, "Number of modules imported from");
STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
STATISTIC(NumLiveSymbols, "Number of live symbols in index");
+namespace llvm {
cl::opt<bool>
ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
cl::desc("Import functions with noinline attribute"));
@@ -185,9 +186,8 @@ static cl::opt<bool> CtxprofMoveRootsToOwnModule(
extern cl::list<GlobalValue::GUID> MoveSymbolGUID;
-namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
-}
+} // end namespace llvm
// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 4f5373846f43..150a2dc5d48e 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -28,10 +28,13 @@ using namespace llvm;
STATISTIC(NumSpecsCreated, "Number of specializations created");
+namespace llvm {
+
static cl::opt<bool> ForceSpecialization(
- "force-specialization", cl::init(false), cl::Hidden, cl::desc(
- "Force function specialization for every call site with a constant "
- "argument"));
+ "force-specialization", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Force function specialization for every call site with a constant "
+ "argument"));
static cl::opt<unsigned> MaxClones(
"funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
@@ -91,6 +94,8 @@ static cl::opt<bool> SpecializeLiteralConstant(
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB,
BasicBlock *Succ) const {
unsigned I = 0;
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index f88d51f443bc..99c4982c58b4 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1680,7 +1680,9 @@ processGlobal(GlobalValue &GV,
/// FastCC.
static void ChangeCalleesToFastCall(Function *F) {
for (User *U : F->users())
- cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
+ if (auto *Call = dyn_cast<CallBase>(U))
+ if (Call->getCalledOperand() == F)
+ Call->setCallingConv(CallingConv::Fast);
}
static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
@@ -1766,10 +1768,12 @@ isValidCandidateForColdCC(Function &F,
return false;
for (User *U : F.users()) {
- CallBase &CB = cast<CallBase>(*U);
- Function *CallerFunc = CB.getParent()->getParent();
+ CallBase *CB = dyn_cast<CallBase>(U);
+ if (!CB || CB->getCalledOperand() != &F)
+ continue;
+ Function *CallerFunc = CB->getParent()->getParent();
BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
- if (!isColdCallSite(CB, CallerBFI))
+ if (!isColdCallSite(*CB, CallerBFI))
return false;
if (!llvm::is_contained(AllCallsCold, CallerFunc))
return false;
@@ -1779,7 +1783,9 @@ isValidCandidateForColdCC(Function &F,
static void changeCallSitesToColdCC(Function *F) {
for (User *U : F->users())
- cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
+ if (auto *Call = dyn_cast<CallBase>(U))
+ if (Call->getCalledOperand() == F)
+ Call->setCallingConv(CallingConv::Cold);
}
// This function iterates over all the call instructions in the input Function
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 15f4d76300bf..894d83fa530b 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -40,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InterleavedRange.h"
+#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
"Number of function clones created during ThinLTO backend");
STATISTIC(FunctionsClonedThinBackend,
"Number of functions that had clones created during ThinLTO backend");
+STATISTIC(
+ FunctionCloneDuplicatesThinBackend,
+ "Number of function clone duplicates detected during ThinLTO backend");
STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
"cloned) during whole program analysis");
STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
@@ -214,11 +219,12 @@ static cl::opt<bool> MemProfRequireDefinitionForPromotion(
"memprof-require-definition-for-promotion", cl::init(false), cl::Hidden,
cl::desc(
"Require target function definition when promoting indirect calls"));
-} // namespace llvm
extern cl::opt<bool> MemProfReportHintedSizes;
extern cl::opt<unsigned> MinClonedColdBytePercent;
+} // namespace llvm
+
namespace {
/// CRTP base for graphs built from either IR or ThinLTO summary index.
///
@@ -1027,19 +1033,17 @@ private:
};
} // namespace
-namespace llvm {
template <>
-struct DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<typename CallsiteContextGraph<
ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
: public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
template <>
-struct DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<typename CallsiteContextGraph<
IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
: public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
template <>
-struct DenseMapInfo<IndexCall>
+struct llvm::DenseMapInfo<IndexCall>
: public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
-} // end namespace llvm
namespace {
@@ -5186,19 +5190,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
return Changed;
}
+// Compute a SHA1 hash of the callsite and alloc version information of clone I
+// in the summary, to use in detection of duplicate clones.
+uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) {
+ SHA1 Hasher;
+ // Update hash with any callsites that call non-default (non-zero) callee
+ // versions.
+ for (auto &SN : FS->callsites()) {
+ // In theory all callsites and allocs in this function should have the same
+ // number of clone entries, but handle any discrepancies gracefully below
+ // for NDEBUG builds.
+ assert(
+ SN.Clones.size() > I &&
+ "Callsite summary has fewer entries than other summaries in function");
+ if (SN.Clones.size() <= I || !SN.Clones[I])
+ continue;
+ uint8_t Data[sizeof(SN.Clones[I])];
+ support::endian::write32le(Data, SN.Clones[I]);
+ Hasher.update(Data);
+ }
+ // Update hash with any allocs that have non-default (non-None) hints.
+ for (auto &AN : FS->allocs()) {
+ // In theory all callsites and allocs in this function should have the same
+ // number of clone entries, but handle any discrepancies gracefully below
+ // for NDEBUG builds.
+ assert(AN.Versions.size() > I &&
+ "Alloc summary has fewer entries than other summaries in function");
+ if (AN.Versions.size() <= I ||
+ (AllocationType)AN.Versions[I] == AllocationType::None)
+ continue;
+ Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1));
+ }
+ return support::endian::read64le(Hasher.result().data());
+}
+
static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
- &FuncToAliasMap) {
+ &FuncToAliasMap,
+ FunctionSummary *FS) {
+ auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
+ // We might have created this when adjusting callsite in another
+ // function. It should be a declaration.
+ assert(DeclGV->isDeclaration());
+ NewGV->takeName(DeclGV);
+ DeclGV->replaceAllUsesWith(NewGV);
+ DeclGV->eraseFromParent();
+ };
+
+ // Handle aliases to this function, and create analogous alias clones to the
+ // provided clone of this function.
+ auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
+ if (!FuncToAliasMap.count(&F))
+ return;
+ for (auto *A : FuncToAliasMap[&F]) {
+ std::string AliasName = getMemProfFuncName(A->getName(), I);
+ auto *PrevA = M.getNamedAlias(AliasName);
+ auto *NewA = GlobalAlias::create(A->getValueType(),
+ A->getType()->getPointerAddressSpace(),
+ A->getLinkage(), AliasName, NewF);
+ NewA->copyAttributesFrom(A);
+ if (PrevA)
+ TakeDeclNameAndReplace(PrevA, NewA);
+ }
+ };
+
// The first "clone" is the original copy, we should only call this if we
// needed to create new clones.
assert(NumClones > 1);
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
VMaps.reserve(NumClones - 1);
FunctionsClonedThinBackend++;
+
+ // Map of hash of callsite/alloc versions to the instantiated function clone
+ // (possibly the original) implementing those calls. Used to avoid
+ // instantiating duplicate function clones.
+ // FIXME: Ideally the thin link would not generate such duplicate clones to
+ // start with, but right now it happens due to phase ordering in the function
+ // assignment and possible new clones that produces. We simply make each
+ // duplicate an alias to the matching instantiated clone recorded in the map
+ // (except for available_externally which are made declarations as they would
+ // be aliases in the prevailing module, and available_externally aliases are
+ // not well supported right now).
+ DenseMap<uint64_t, Function *> HashToFunc;
+
+ // Save the hash of the original function version.
+ HashToFunc[ComputeHash(FS, 0)] = &F;
+
for (unsigned I = 1; I < NumClones; I++) {
VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
+ std::string Name = getMemProfFuncName(F.getName(), I);
+ auto Hash = ComputeHash(FS, I);
+ // If this clone would duplicate a previously seen clone, don't generate the
+ // duplicate clone body, just make an alias to satisfy any (potentially
+ // cross-module) references.
+ if (HashToFunc.contains(Hash)) {
+ FunctionCloneDuplicatesThinBackend++;
+ auto *Func = HashToFunc[Hash];
+ if (Func->hasAvailableExternallyLinkage()) {
+ // Skip these as EliminateAvailableExternallyPass does not handle
+ // available_externally aliases correctly and we end up with an
+ // available_externally alias to a declaration. Just create a
+ // declaration for now as we know we will have a definition in another
+ // module.
+ auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone decl " << ore::NV("Decl", Decl.getCallee()));
+ continue;
+ }
+ auto *PrevF = M.getFunction(Name);
+ auto *Alias = GlobalAlias::create(Name, Func);
+ if (PrevF)
+ TakeDeclNameAndReplace(PrevF, Alias);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone alias " << ore::NV("Alias", Alias));
+
+ // Now handle aliases to this function, and clone those as well.
+ CloneFuncAliases(Func, I);
+ continue;
+ }
auto *NewF = CloneFunction(&F, *VMaps.back());
+ HashToFunc[Hash] = NewF;
FunctionClonesThinBackend++;
// Strip memprof and callsite metadata from clone as they are no longer
// needed.
@@ -5208,40 +5320,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
}
}
- std::string Name = getMemProfFuncName(F.getName(), I);
auto *PrevF = M.getFunction(Name);
- if (PrevF) {
- // We might have created this when adjusting callsite in another
- // function. It should be a declaration.
- assert(PrevF->isDeclaration());
- NewF->takeName(PrevF);
- PrevF->replaceAllUsesWith(NewF);
- PrevF->eraseFromParent();
- } else
+ if (PrevF)
+ TakeDeclNameAndReplace(PrevF, NewF);
+ else
NewF->setName(Name);
updateSubprogramLinkageName(NewF, Name);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone " << ore::NV("NewFunction", NewF));
// Now handle aliases to this function, and clone those as well.
- if (!FuncToAliasMap.count(&F))
- continue;
- for (auto *A : FuncToAliasMap[&F]) {
- std::string Name = getMemProfFuncName(A->getName(), I);
- auto *PrevA = M.getNamedAlias(Name);
- auto *NewA = GlobalAlias::create(A->getValueType(),
- A->getType()->getPointerAddressSpace(),
- A->getLinkage(), Name, NewF);
- NewA->copyAttributesFrom(A);
- if (PrevA) {
- // We might have created this when adjusting callsite in another
- // function. It should be a declaration.
- assert(PrevA->isDeclaration());
- NewA->takeName(PrevA);
- PrevA->replaceAllUsesWith(NewA);
- PrevA->eraseFromParent();
- }
- }
+ CloneFuncAliases(NewF, I);
}
return VMaps;
}
@@ -5401,7 +5490,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
bool ClonesCreated = false;
unsigned NumClonesCreated = 0;
- auto CloneFuncIfNeeded = [&](unsigned NumClones) {
+ auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {
// We should at least have version 0 which is the original copy.
assert(NumClones > 0);
// If only one copy needed use original.
@@ -5415,7 +5504,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
assert(NumClonesCreated == NumClones);
return;
}
- VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
+ VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS);
// The first "clone" is the original copy, which doesn't have a VMap.
assert(VMaps.size() == NumClones - 1);
Changed = true;
@@ -5424,9 +5513,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
};
auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
- Function *CalledFunction) {
+ Function *CalledFunction, FunctionSummary *FS) {
// Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
+ CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS);
assert(!isMemProfClone(*CalledFunction));
@@ -5448,6 +5537,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// below.
auto CalleeOrigName = CalledFunction->getName();
for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Do nothing if this version calls the original version of its
// callee.
if (!StackNode.Clones[J])
@@ -5567,9 +5660,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
// Include allocs that were already assigned a memprof function
- // attribute in the statistics.
- if (CB->getAttributes().hasFnAttr("memprof")) {
- assert(!MemProfMD);
+ // attribute in the statistics. Only do this for those that do not have
+ // memprof metadata, since we add an "ambiguous" memprof attribute by
+ // default.
+ if (CB->getAttributes().hasFnAttr("memprof") && !MemProfMD) {
CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
? AllocTypeColdThinBackend++
: AllocTypeNotColdThinBackend++;
@@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
#endif
// Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
+ CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS);
OrigAllocsThinBackend++;
AllocVersionsThinBackend += AllocNode.Versions.size();
@@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// Update the allocation types per the summary info.
for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
+ // If the VMap is empty, this clone was a duplicate of another and
+ // was created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Ignore any that didn't get an assigned allocation type.
if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
continue;
@@ -5671,7 +5769,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// we don't need to do ICP, but might need to clone this
// function as it is the target of other cloned calls.
if (NumClones)
- CloneFuncIfNeeded(NumClones);
+ CloneFuncIfNeeded(NumClones, FS);
}
else {
@@ -5691,7 +5789,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
}
#endif
- CloneCallsite(StackNode, CB, CalledFunction);
+ CloneCallsite(StackNode, CB, CalledFunction, FS);
}
} else if (CB->isTailCall() && CalledFunction) {
// Locate the synthesized callsite info for the callee VI, if any was
@@ -5701,7 +5799,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
assert(Callsite != MapTailCallCalleeVIToCallsite.end());
- CloneCallsite(Callsite->second, CB, CalledFunction);
+ CloneCallsite(Callsite->second, CB, CalledFunction, FS);
}
}
}
@@ -5847,6 +5945,10 @@ void MemProfContextDisambiguation::performICP(
// check.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
@@ -5892,6 +5994,10 @@ void MemProfContextDisambiguation::performICP(
// TotalCount and the number promoted.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
@@ -6044,3 +6150,42 @@ void MemProfContextDisambiguation::run(
IndexCallsiteContextGraph CCG(Index, isPrevailing);
CCG.process();
}
+
+// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
+// when we don't have an index that has recorded that we are linking with
+// allocation libraries containing the necessary APIs for downstream
+// transformations.
+PreservedAnalyses MemProfRemoveInfo::run(Module &M, ModuleAnalysisManager &AM) {
+ // The profile matcher applies hotness attributes directly for allocations,
+ // and those will cause us to generate calls to the hot/cold interfaces
+ // unconditionally. If supports-hot-cold-new was not enabled in the LTO
+ // link then assume we don't want these calls (e.g. not linking with
+ // the appropriate library, or otherwise trying to disable this behavior).
+ bool Changed = false;
+ for (auto &F : M) {
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CI = dyn_cast<CallBase>(&I);
+ if (!CI)
+ continue;
+ if (CI->hasFnAttr("memprof")) {
+ CI->removeFnAttr("memprof");
+ Changed = true;
+ }
+ if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
+ assert(!CI->hasMetadata(LLVMContext::MD_memprof));
+ continue;
+ }
+ // Strip off all memprof metadata as it is no longer needed.
+ // Importantly, this avoids the addition of new memprof attributes
+ // after inlining propagation.
+ CI->setMetadata(LLVMContext::MD_memprof, nullptr);
+ CI->setMetadata(LLVMContext::MD_callsite, nullptr);
+ Changed = true;
+ }
+ }
+ }
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 2583249e6548..1a00d173d3ae 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio(
"outline candidate and original function"));
// Used to tune the minimum number of execution counts needed in the predecessor
// block to the cold edge. ie. confidence interval.
-static cl::opt<unsigned>
+cl::opt<unsigned>
MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
cl::desc("Minimum block executions to consider "
"its BranchProbabilityInfo valid"));
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 5bc7e3493812..e39e311dd795 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -116,6 +116,8 @@ STATISTIC(
NumCSInlinedHitGrowthLimit,
"Number of functions with FDO inline stopped due to growth size limit");
+namespace llvm {
+
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(
@@ -198,7 +200,6 @@ static cl::opt<bool> DisableSampleLoaderInlining(
"pass, and merge (or scale) profiles (as configured by "
"--sample-profile-merge-inlinee)."));
-namespace llvm {
cl::opt<bool>
SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
cl::desc("Sort profiled recursion by edge weights."));
@@ -1664,8 +1665,9 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
else if (OverwriteExistingWeights)
I.setMetadata(LLVMContext::MD_prof, nullptr);
} else if (!isa<IntrinsicInst>(&I)) {
- setBranchWeights(I, {static_cast<uint32_t>(BlockWeights[BB])},
- /*IsExpected=*/false);
+ setBranchWeights(
+ I, ArrayRef<uint32_t>{static_cast<uint32_t>(BlockWeights[BB])},
+ /*IsExpected=*/false);
}
}
} else if (OverwriteExistingWeights || ProfileSampleBlockAccurate) {
@@ -1676,7 +1678,8 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
if (cast<CallBase>(I).isIndirectCall()) {
I.setMetadata(LLVMContext::MD_prof, nullptr);
} else {
- setBranchWeights(I, {uint32_t(0)}, /*IsExpected=*/false);
+ setBranchWeights(I, ArrayRef<uint32_t>{uint32_t(0)},
+ /*IsExpected=*/false);
}
}
}
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 093a39eb4b5d..70b861482682 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -23,6 +23,8 @@ using namespace sampleprof;
#define DEBUG_TYPE "sample-profile-matcher"
+namespace llvm {
+
static cl::opt<unsigned> FuncProfileSimilarityThreshold(
"func-profile-similarity-threshold", cl::Hidden, cl::init(80),
cl::desc("Consider a profile matches a function if the similarity of their "
@@ -55,6 +57,8 @@ static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
cl::desc("The maximum number of callsites in a function, above which stale "
"profile matching will be skipped."));
+} // end namespace llvm
+
void SampleProfileMatcher::findIRAnchors(const Function &F,
AnchorMap &IRAnchors) const {
// For inlined code, recover the original callsite and callee by finding the
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 09bffa7bf584..2d5cb8268ffd 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -120,6 +120,8 @@ STATISTIC(NumVirtConstProp1Bit,
"Number of 1 bit virtual constant propagations");
STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
+namespace llvm {
+
static cl::opt<PassSummaryAction> ClSummaryAction(
"wholeprogramdevirt-summary-action",
cl::desc("What to do with the summary when running this pass"),
@@ -175,6 +177,8 @@ static cl::list<std::string>
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
/// With Clang, a pure virtual class's deleting destructor is emitted as a
/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
/// context of whole program devirtualization, the deleting destructor of a pure
@@ -368,9 +372,7 @@ struct VTableSlot {
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<VTableSlot> {
+template <> struct llvm::DenseMapInfo<VTableSlot> {
static VTableSlot getEmptyKey() {
return {DenseMapInfo<Metadata *>::getEmptyKey(),
DenseMapInfo<uint64_t>::getEmptyKey()};
@@ -389,7 +391,7 @@ template <> struct DenseMapInfo<VTableSlot> {
}
};
-template <> struct DenseMapInfo<VTableSlotSummary> {
+template <> struct llvm::DenseMapInfo<VTableSlotSummary> {
static VTableSlotSummary getEmptyKey() {
return {DenseMapInfo<StringRef>::getEmptyKey(),
DenseMapInfo<uint64_t>::getEmptyKey()};
@@ -408,8 +410,6 @@ template <> struct DenseMapInfo<VTableSlotSummary> {
}
};
-} // end namespace llvm
-
// Returns true if the function must be unreachable based on ValueInfo.
//
// In particular, identifies a function as unreachable in the following