diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO')
| -rw-r--r-- | llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 119 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/FunctionImport.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/GlobalOpt.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp | 235 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/PartialInlining.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp | 12 |
9 files changed, 326 insertions, 88 deletions
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 8d9a0e7eaef6..50130da01c7b 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -2067,6 +2067,36 @@ static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, AI.run(SCCNodes, Changed); } +// Determines if the function 'F' can be marked 'norecurse'. +// It returns true if any call within 'F' could lead to a recursive +// call back to 'F', and false otherwise. +// The 'AnyFunctionsAddressIsTaken' parameter is a module-wide flag +// that is true if any function's address is taken, or if any function +// has external linkage. This is used to determine the safety of +// external/library calls. +static bool mayHaveRecursiveCallee(Function &F, + bool AnyFunctionsAddressIsTaken = true) { + for (const auto &BB : F) { + for (const auto &I : BB.instructionsWithoutDebug()) { + if (const auto *CB = dyn_cast<CallBase>(&I)) { + const Function *Callee = CB->getCalledFunction(); + if (!Callee || Callee == &F) + return true; + + if (Callee->doesNotRecurse()) + continue; + + if (!AnyFunctionsAddressIsTaken || + (Callee->isDeclaration() && + Callee->hasFnAttribute(Attribute::NoCallback))) + continue; + return true; + } + } + } + return false; +} + static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, SmallPtrSet<Function *, 8> &Changed) { // Try and identify functions that do not recurse. @@ -2078,28 +2108,14 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, Function *F = *SCCNodes.begin(); if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) return; - - // If all of the calls in F are identifiable and are to norecurse functions, F - // is norecurse. This check also detects self-recursion as F is not currently - // marked norecurse, so any called from F to F will not be marked norecurse. - for (auto &BB : *F) - for (auto &I : BB.instructionsWithoutDebug()) - if (auto *CB = dyn_cast<CallBase>(&I)) { - Function *Callee = CB->getCalledFunction(); - if (!Callee || Callee == F || - (!Callee->doesNotRecurse() && - !(Callee->isDeclaration() && - Callee->hasFnAttribute(Attribute::NoCallback)))) - // Function calls a potentially recursive function. - return; - } - - // Every call was to a non-recursive function other than this function, and - // we have no indirect recursion as the SCC size is one. This function cannot - // recurse. - F->setDoesNotRecurse(); - ++NumNoRecurse; - Changed.insert(F); + if (!mayHaveRecursiveCallee(*F)) { + // Every call was to a non-recursive function other than this function, and + // we have no indirect recursion as the SCC size is one. This function + // cannot recurse. + F->setDoesNotRecurse(); + ++NumNoRecurse; + Changed.insert(F); + } } // Set the noreturn function attribute if possible. @@ -2429,3 +2445,62 @@ ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { PA.preserve<LazyCallGraphAnalysis>(); return PA; } + +PreservedAnalyses NoRecurseLTOInferencePass::run(Module &M, + ModuleAnalysisManager &MAM) { + + // Check if any function in the whole program has its address taken or has + // potentially external linkage. + // We use this information when inferring norecurse attribute: If there is + // no function whose address is taken and all functions have internal + // linkage, there is no path for a callback to any user function. + bool AnyFunctionsAddressIsTaken = false; + for (Function &F : M) { + if (F.isDeclaration() || F.doesNotRecurse()) + continue; + if (!F.hasLocalLinkage() || F.hasAddressTaken()) { + AnyFunctionsAddressIsTaken = true; + break; + } + } + + // Run norecurse inference on all RefSCCs in the LazyCallGraph for this + // module. + bool Changed = false; + LazyCallGraph &CG = MAM.getResult<LazyCallGraphAnalysis>(M); + CG.buildRefSCCs(); + + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { + // Skip any RefSCC that is part of a call cycle. A RefSCC containing more + // than one SCC indicates a recursive relationship involving indirect calls. + if (RC.size() > 1) + continue; + + // RefSCC contains a single-SCC. SCC size > 1 indicates mutually recursive + // functions. Ex: foo1 -> foo2 -> foo3 -> foo1. + LazyCallGraph::SCC &S = *RC.begin(); + if (S.size() > 1) + continue; + + // Get the single function from this SCC. + Function &F = S.begin()->getFunction(); + if (!F.hasExactDefinition() || F.doesNotRecurse()) + continue; + + // If the analysis confirms that this function has no recursive calls + // (either direct, indirect, or through external linkages), + // we can safely apply the norecurse attribute. + if (!mayHaveRecursiveCallee(F, AnyFunctionsAddressIsTaken)) { + F.setDoesNotRecurse(); + ++NumNoRecurse; + Changed = true; + } + } + + PreservedAnalyses PA; + if (Changed) + PA.preserve<LazyCallGraphAnalysis>(); + else + PA = PreservedAnalyses::all(); + return PA; +} diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 83aa7de5400f..28ee4449421b 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -72,6 +72,7 @@ STATISTIC(NumImportedModules, "Number of modules imported from"); STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index"); STATISTIC(NumLiveSymbols, "Number of live symbols in index"); +namespace llvm { cl::opt<bool> ForceImportAll("force-import-all", cl::init(false), cl::Hidden, cl::desc("Import functions with noinline attribute")); @@ -185,9 +186,8 @@ static cl::opt<bool> CtxprofMoveRootsToOwnModule( extern cl::list<GlobalValue::GUID> MoveSymbolGUID; -namespace llvm { extern cl::opt<bool> EnableMemProfContextDisambiguation; -} +} // end namespace llvm // Load lazily a module from \p FileName in \p Context. static std::unique_ptr<Module> loadFile(const std::string &FileName, diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 4f5373846f43..150a2dc5d48e 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -28,10 +28,13 @@ using namespace llvm; STATISTIC(NumSpecsCreated, "Number of specializations created"); +namespace llvm { + static cl::opt<bool> ForceSpecialization( - "force-specialization", cl::init(false), cl::Hidden, cl::desc( - "Force function specialization for every call site with a constant " - "argument")); + "force-specialization", cl::init(false), cl::Hidden, + cl::desc( + "Force function specialization for every call site with a constant " + "argument")); static cl::opt<unsigned> MaxClones( "funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc( @@ -91,6 +94,8 @@ static cl::opt<bool> SpecializeLiteralConstant( extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // end namespace llvm + bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const { unsigned I = 0; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index f88d51f443bc..99c4982c58b4 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1680,7 +1680,9 @@ processGlobal(GlobalValue &GV, /// FastCC. static void ChangeCalleesToFastCall(Function *F) { for (User *U : F->users()) - cast<CallBase>(U)->setCallingConv(CallingConv::Fast); + if (auto *Call = dyn_cast<CallBase>(U)) + if (Call->getCalledOperand() == F) + Call->setCallingConv(CallingConv::Fast); } static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs, @@ -1766,10 +1768,12 @@ isValidCandidateForColdCC(Function &F, return false; for (User *U : F.users()) { - CallBase &CB = cast<CallBase>(*U); - Function *CallerFunc = CB.getParent()->getParent(); + CallBase *CB = dyn_cast<CallBase>(U); + if (!CB || CB->getCalledOperand() != &F) + continue; + Function *CallerFunc = CB->getParent()->getParent(); BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); - if (!isColdCallSite(CB, CallerBFI)) + if (!isColdCallSite(*CB, CallerBFI)) return false; if (!llvm::is_contained(AllCallsCold, CallerFunc)) return false; @@ -1779,7 +1783,9 @@ isValidCandidateForColdCC(Function &F, static void changeCallSitesToColdCC(Function *F) { for (User *U : F->users()) - cast<CallBase>(U)->setCallingConv(CallingConv::Cold); + if (auto *Call = dyn_cast<CallBase>(U)) + if (Call->getCalledOperand() == F) + Call->setCallingConv(CallingConv::Cold); } // This function iterates over all the call instructions in the input Function diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 15f4d76300bf..894d83fa530b 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -40,6 +41,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" @@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend, "Number of function clones created during ThinLTO backend"); STATISTIC(FunctionsClonedThinBackend, "Number of functions that had clones created during ThinLTO backend"); +STATISTIC( + FunctionCloneDuplicatesThinBackend, + "Number of function clone duplicates detected during ThinLTO backend"); STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " "cloned) during whole program analysis"); STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " @@ -214,11 +219,12 @@ static cl::opt<bool> MemProfRequireDefinitionForPromotion( "memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc( "Require target function definition when promoting indirect calls")); -} // namespace llvm extern cl::opt<bool> MemProfReportHintedSizes; extern cl::opt<unsigned> MinClonedColdBytePercent; +} // namespace llvm + namespace { /// CRTP base for graphs built from either IR or ThinLTO summary index. /// @@ -1027,19 +1033,17 @@ private: }; } // namespace -namespace llvm { template <> -struct DenseMapInfo<typename CallsiteContextGraph< +struct llvm::DenseMapInfo<typename CallsiteContextGraph< ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo> : public DenseMapInfo<std::pair<Instruction *, unsigned>> {}; template <> -struct DenseMapInfo<typename CallsiteContextGraph< +struct llvm::DenseMapInfo<typename CallsiteContextGraph< IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo> : public DenseMapInfo<std::pair<IndexCall, unsigned>> {}; template <> -struct DenseMapInfo<IndexCall> +struct llvm::DenseMapInfo<IndexCall> : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {}; -} // end namespace llvm namespace { @@ -5186,19 +5190,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { return Changed; } +// Compute a SHA1 hash of the callsite and alloc version information of clone I +// in the summary, to use in detection of duplicate clones. +uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) { + SHA1 Hasher; + // Update hash with any callsites that call non-default (non-zero) callee + // versions. + for (auto &SN : FS->callsites()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert( + SN.Clones.size() > I && + "Callsite summary has fewer entries than other summaries in function"); + if (SN.Clones.size() <= I || !SN.Clones[I]) + continue; + uint8_t Data[sizeof(SN.Clones[I])]; + support::endian::write32le(Data, SN.Clones[I]); + Hasher.update(Data); + } + // Update hash with any allocs that have non-default (non-None) hints. + for (auto &AN : FS->allocs()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert(AN.Versions.size() > I && + "Alloc summary has fewer entries than other summaries in function"); + if (AN.Versions.size() <= I || + (AllocationType)AN.Versions[I] == AllocationType::None) + continue; + Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1)); + } + return support::endian::read64le(Hasher.result().data()); +} + static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> - &FuncToAliasMap) { + &FuncToAliasMap, + FunctionSummary *FS) { + auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) { + // We might have created this when adjusting callsite in another + // function. It should be a declaration. + assert(DeclGV->isDeclaration()); + NewGV->takeName(DeclGV); + DeclGV->replaceAllUsesWith(NewGV); + DeclGV->eraseFromParent(); + }; + + // Handle aliases to this function, and create analogous alias clones to the + // provided clone of this function. + auto CloneFuncAliases = [&](Function *NewF, unsigned I) { + if (!FuncToAliasMap.count(&F)) + return; + for (auto *A : FuncToAliasMap[&F]) { + std::string AliasName = getMemProfFuncName(A->getName(), I); + auto *PrevA = M.getNamedAlias(AliasName); + auto *NewA = GlobalAlias::create(A->getValueType(), + A->getType()->getPointerAddressSpace(), + A->getLinkage(), AliasName, NewF); + NewA->copyAttributesFrom(A); + if (PrevA) + TakeDeclNameAndReplace(PrevA, NewA); + } + }; + // The first "clone" is the original copy, we should only call this if we // needed to create new clones. assert(NumClones > 1); SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; VMaps.reserve(NumClones - 1); FunctionsClonedThinBackend++; + + // Map of hash of callsite/alloc versions to the instantiated function clone + // (possibly the original) implementing those calls. Used to avoid + // instantiating duplicate function clones. + // FIXME: Ideally the thin link would not generate such duplicate clones to + // start with, but right now it happens due to phase ordering in the function + // assignment and possible new clones that produces. We simply make each + // duplicate an alias to the matching instantiated clone recorded in the map + // (except for available_externally which are made declarations as they would + // be aliases in the prevailing module, and available_externally aliases are + // not well supported right now). + DenseMap<uint64_t, Function *> HashToFunc; + + // Save the hash of the original function version. + HashToFunc[ComputeHash(FS, 0)] = &F; + for (unsigned I = 1; I < NumClones; I++) { VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); + std::string Name = getMemProfFuncName(F.getName(), I); + auto Hash = ComputeHash(FS, I); + // If this clone would duplicate a previously seen clone, don't generate the + // duplicate clone body, just make an alias to satisfy any (potentially + // cross-module) references. + if (HashToFunc.contains(Hash)) { + FunctionCloneDuplicatesThinBackend++; + auto *Func = HashToFunc[Hash]; + if (Func->hasAvailableExternallyLinkage()) { + // Skip these as EliminateAvailableExternallyPass does not handle + // available_externally aliases correctly and we end up with an + // available_externally alias to a declaration. Just create a + // declaration for now as we know we will have a definition in another + // module. + auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone decl " << ore::NV("Decl", Decl.getCallee())); + continue; + } + auto *PrevF = M.getFunction(Name); + auto *Alias = GlobalAlias::create(Name, Func); + if (PrevF) + TakeDeclNameAndReplace(PrevF, Alias); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone alias " << ore::NV("Alias", Alias)); + + // Now handle aliases to this function, and clone those as well. + CloneFuncAliases(Func, I); + continue; + } auto *NewF = CloneFunction(&F, *VMaps.back()); + HashToFunc[Hash] = NewF; FunctionClonesThinBackend++; // Strip memprof and callsite metadata from clone as they are no longer // needed. @@ -5208,40 +5320,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( Inst.setMetadata(LLVMContext::MD_callsite, nullptr); } } - std::string Name = getMemProfFuncName(F.getName(), I); auto *PrevF = M.getFunction(Name); - if (PrevF) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevF->isDeclaration()); - NewF->takeName(PrevF); - PrevF->replaceAllUsesWith(NewF); - PrevF->eraseFromParent(); - } else + if (PrevF) + TakeDeclNameAndReplace(PrevF, NewF); + else NewF->setName(Name); updateSubprogramLinkageName(NewF, Name); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) << "created clone " << ore::NV("NewFunction", NewF)); // Now handle aliases to this function, and clone those as well. - if (!FuncToAliasMap.count(&F)) - continue; - for (auto *A : FuncToAliasMap[&F]) { - std::string Name = getMemProfFuncName(A->getName(), I); - auto *PrevA = M.getNamedAlias(Name); - auto *NewA = GlobalAlias::create(A->getValueType(), - A->getType()->getPointerAddressSpace(), - A->getLinkage(), Name, NewF); - NewA->copyAttributesFrom(A); - if (PrevA) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevA->isDeclaration()); - NewA->takeName(PrevA); - PrevA->replaceAllUsesWith(NewA); - PrevA->eraseFromParent(); - } - } + CloneFuncAliases(NewF, I); } return VMaps; } @@ -5401,7 +5490,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; bool ClonesCreated = false; unsigned NumClonesCreated = 0; - auto CloneFuncIfNeeded = [&](unsigned NumClones) { + auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) { // We should at least have version 0 which is the original copy. assert(NumClones > 0); // If only one copy needed use original. @@ -5415,7 +5504,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { assert(NumClonesCreated == NumClones); return; } - VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); + VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS); // The first "clone" is the original copy, which doesn't have a VMap. assert(VMaps.size() == NumClones - 1); Changed = true; @@ -5424,9 +5513,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { }; auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, - Function *CalledFunction) { + Function *CalledFunction, FunctionSummary *FS) { // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); + CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS); assert(!isMemProfClone(*CalledFunction)); @@ -5448,6 +5537,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // below. auto CalleeOrigName = CalledFunction->getName(); for (unsigned J = 0; J < StackNode.Clones.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Do nothing if this version calls the original version of its // callee. if (!StackNode.Clones[J]) @@ -5567,9 +5660,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof); // Include allocs that were already assigned a memprof function - // attribute in the statistics. - if (CB->getAttributes().hasFnAttr("memprof")) { - assert(!MemProfMD); + // attribute in the statistics. Only do this for those that do not have + // memprof metadata, since we add an "ambiguous" memprof attribute by + // default. + if (CB->getAttributes().hasFnAttr("memprof") && !MemProfMD) { CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold" ? AllocTypeColdThinBackend++ : AllocTypeNotColdThinBackend++; @@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { #endif // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); + CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS); OrigAllocsThinBackend++; AllocVersionsThinBackend += AllocNode.Versions.size(); @@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // Update the allocation types per the summary info. for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and + // was created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Ignore any that didn't get an assigned allocation type. if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) continue; @@ -5671,7 +5769,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // we don't need to do ICP, but might need to clone this // function as it is the target of other cloned calls. if (NumClones) - CloneFuncIfNeeded(NumClones); + CloneFuncIfNeeded(NumClones, FS); } else { @@ -5691,7 +5789,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { } #endif - CloneCallsite(StackNode, CB, CalledFunction); + CloneCallsite(StackNode, CB, CalledFunction, FS); } } else if (CB->isTailCall() && CalledFunction) { // Locate the synthesized callsite info for the callee VI, if any was @@ -5701,7 +5799,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); assert(Callsite != MapTailCallCalleeVIToCallsite.end()); - CloneCallsite(Callsite->second, CB, CalledFunction); + CloneCallsite(Callsite->second, CB, CalledFunction, FS); } } } @@ -5847,6 +5945,10 @@ void MemProfContextDisambiguation::performICP( // check. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); @@ -5892,6 +5994,10 @@ void MemProfContextDisambiguation::performICP( // TotalCount and the number promoted. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); @@ -6044,3 +6150,42 @@ void MemProfContextDisambiguation::run( IndexCallsiteContextGraph CCG(Index, isPrevailing); CCG.process(); } + +// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline +// when we don't have an index that has recorded that we are linking with +// allocation libraries containing the necessary APIs for downstream +// transformations. +PreservedAnalyses MemProfRemoveInfo::run(Module &M, ModuleAnalysisManager &AM) { + // The profile matcher applies hotness attributes directly for allocations, + // and those will cause us to generate calls to the hot/cold interfaces + // unconditionally. If supports-hot-cold-new was not enabled in the LTO + // link then assume we don't want these calls (e.g. not linking with + // the appropriate library, or otherwise trying to disable this behavior). + bool Changed = false; + for (auto &F : M) { + for (auto &BB : F) { + for (auto &I : BB) { + auto *CI = dyn_cast<CallBase>(&I); + if (!CI) + continue; + if (CI->hasFnAttr("memprof")) { + CI->removeFnAttr("memprof"); + Changed = true; + } + if (!CI->hasMetadata(LLVMContext::MD_callsite)) { + assert(!CI->hasMetadata(LLVMContext::MD_memprof)); + continue; + } + // Strip off all memprof metadata as it is no longer needed. + // Importantly, this avoids the addition of new memprof attributes + // after inlining propagation. + CI->setMetadata(LLVMContext::MD_memprof, nullptr); + CI->setMetadata(LLVMContext::MD_callsite, nullptr); + Changed = true; + } + } + } + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 2583249e6548..1a00d173d3ae 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio( "outline candidate and original function")); // Used to tune the minimum number of execution counts needed in the predecessor // block to the cold edge. ie. confidence interval. -static cl::opt<unsigned> +cl::opt<unsigned> MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid")); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 5bc7e3493812..e39e311dd795 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -116,6 +116,8 @@ STATISTIC( NumCSInlinedHitGrowthLimit, "Number of functions with FDO inline stopped due to growth size limit"); +namespace llvm { + // Command line option to specify the file to read samples from. This is // mainly used for debugging. static cl::opt<std::string> SampleProfileFile( @@ -198,7 +200,6 @@ static cl::opt<bool> DisableSampleLoaderInlining( "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee).")); -namespace llvm { cl::opt<bool> SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights.")); @@ -1664,8 +1665,9 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { else if (OverwriteExistingWeights) I.setMetadata(LLVMContext::MD_prof, nullptr); } else if (!isa<IntrinsicInst>(&I)) { - setBranchWeights(I, {static_cast<uint32_t>(BlockWeights[BB])}, - /*IsExpected=*/false); + setBranchWeights( + I, ArrayRef<uint32_t>{static_cast<uint32_t>(BlockWeights[BB])}, + /*IsExpected=*/false); } } } else if (OverwriteExistingWeights || ProfileSampleBlockAccurate) { @@ -1676,7 +1678,8 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { if (cast<CallBase>(I).isIndirectCall()) { I.setMetadata(LLVMContext::MD_prof, nullptr); } else { - setBranchWeights(I, {uint32_t(0)}, /*IsExpected=*/false); + setBranchWeights(I, ArrayRef<uint32_t>{uint32_t(0)}, + /*IsExpected=*/false); } } } diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 093a39eb4b5d..70b861482682 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -23,6 +23,8 @@ using namespace sampleprof; #define DEBUG_TYPE "sample-profile-matcher" +namespace llvm { + static cl::opt<unsigned> FuncProfileSimilarityThreshold( "func-profile-similarity-threshold", cl::Hidden, cl::init(80), cl::desc("Consider a profile matches a function if the similarity of their " @@ -55,6 +57,8 @@ static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped.")); +} // end namespace llvm + void SampleProfileMatcher::findIRAnchors(const Function &F, AnchorMap &IRAnchors) const { // For inlined code, recover the original callsite and callee by finding the diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 09bffa7bf584..2d5cb8268ffd 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -120,6 +120,8 @@ STATISTIC(NumVirtConstProp1Bit, "Number of 1 bit virtual constant propagations"); STATISTIC(NumVirtConstProp, "Number of virtual constant propagations"); +namespace llvm { + static cl::opt<PassSummaryAction> ClSummaryAction( "wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), @@ -175,6 +177,8 @@ static cl::list<std::string> extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // end namespace llvm + /// With Clang, a pure virtual class's deleting destructor is emitted as a /// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the /// context of whole program devirtualization, the deleting destructor of a pure @@ -368,9 +372,7 @@ struct VTableSlot { } // end anonymous namespace -namespace llvm { - -template <> struct DenseMapInfo<VTableSlot> { +template <> struct llvm::DenseMapInfo<VTableSlot> { static VTableSlot getEmptyKey() { return {DenseMapInfo<Metadata *>::getEmptyKey(), DenseMapInfo<uint64_t>::getEmptyKey()}; @@ -389,7 +391,7 @@ template <> struct DenseMapInfo<VTableSlot> { } }; -template <> struct DenseMapInfo<VTableSlotSummary> { +template <> struct llvm::DenseMapInfo<VTableSlotSummary> { static VTableSlotSummary getEmptyKey() { return {DenseMapInfo<StringRef>::getEmptyKey(), DenseMapInfo<uint64_t>::getEmptyKey()}; @@ -408,8 +410,6 @@ template <> struct DenseMapInfo<VTableSlotSummary> { } }; -} // end namespace llvm - // Returns true if the function must be unreachable based on ValueInfo. // // In particular, identifies a function as unreachable in the following |
