From 47459455009db4790ffc3765a2ec0f8b4934c2a4 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 11 Oct 2022 14:00:37 -0700 Subject: [MemProf] ThinLTO summary support Implements the ThinLTO summary support for memprof related metadata. This includes support for the assembly format, and for building the summary from IR during ModuleSummaryAnalysis. To reduce space in both the bitcode format and the in memory index, we do 2 things: 1. We keep a single vector of all uniq stack id hashes, and record the index into this vector in the callsite and allocation memprof summaries. 2. When building the combined index during the LTO link, the callsite and allocation memprof summaries are only kept on the FunctionSummary of the prevailing copy. Differential Revision: https://reviews.llvm.org/D135714 --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 161 ++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 21 deletions(-) (limited to 'llvm/lib/Bitcode/Reader/BitcodeReader.cpp') diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 0a2415cbfdef..c4805602d1ad 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -883,8 +883,10 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { // they are recorded in the summary index being built. // We save a GUID which refers to the same global as the ValueInfo, but // ignoring the linkage, i.e. for values other than local linkage they are - // identical. - DenseMap> + // identical (this is the second tuple member). + // The third tuple member is the real GUID of the ValueInfo. + DenseMap> ValueIdToValueInfoMap; /// Map populated during module path string table parsing, from the @@ -904,10 +906,19 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { /// this module by the client. unsigned ModuleId; + /// Callback to ask whether a symbol is the prevailing copy when invoked + /// during combined index building. + std::function IsPrevailing; + + /// Saves the stack ids from the STACK_IDS record to consult when adding stack + /// ids from the lists in the callsite and alloc entries to the index. + std::vector StackIds; + public: - ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab, - ModuleSummaryIndex &TheIndex, - StringRef ModulePath, unsigned ModuleId); + ModuleSummaryIndexBitcodeReader( + BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex, + StringRef ModulePath, unsigned ModuleId, + std::function IsPrevailing = nullptr); Error parseModule(); @@ -931,7 +942,8 @@ private: std::vector parseParamAccesses(ArrayRef Record); - std::tuple + template + std::tuple getValueInfoFromValueId(unsigned ValueId); void addThisModule(); @@ -6643,9 +6655,10 @@ std::vector BitcodeReader::getIdentifiedStructTypes() const { ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex, - StringRef ModulePath, unsigned ModuleId) + StringRef ModulePath, unsigned ModuleId, + std::function IsPrevailing) : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex), - ModulePath(ModulePath), ModuleId(ModuleId) {} + ModulePath(ModulePath), ModuleId(ModuleId), IsPrevailing(IsPrevailing) {} void ModuleSummaryIndexBitcodeReader::addThisModule() { TheIndex.addModule(ModulePath, ModuleId); @@ -6656,10 +6669,15 @@ ModuleSummaryIndexBitcodeReader::getThisModule() { return TheIndex.getModule(ModulePath); } -std::tuple +template +std::tuple ModuleSummaryIndexBitcodeReader::getValueInfoFromValueId(unsigned ValueId) { auto VGI = ValueIdToValueInfoMap[ValueId]; - assert(std::get<0>(VGI)); + // We can have a null value info for memprof callsite info records in + // distributed ThinLTO index files when the callee function summary is not + // included in the index. The bitcode writer records 0 in that case, + // and the caller of this helper will set AllowNullValueInfo to true. + assert(AllowNullValueInfo || std::get<0>(VGI)); return VGI; } @@ -6682,7 +6700,7 @@ void ModuleSummaryIndexBitcodeReader::setValueGUID( ValueIdToValueInfoMap[ValueID] = std::make_tuple( TheIndex.getOrInsertValueInfo( ValueGUID, UseStrtab ? ValueName : TheIndex.saveString(ValueName)), - OriginalNameID); + OriginalNameID, ValueGUID); } // Specialized value symbol table parser used when reading module index @@ -6770,8 +6788,8 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( GlobalValue::GUID RefGUID = Record[1]; // The "original name", which is the second value of the pair will be // overriden later by a FS_COMBINED_ORIGINAL_NAME in the combined index. - ValueIdToValueInfoMap[ValueID] = - std::make_tuple(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID); + ValueIdToValueInfoMap[ValueID] = std::make_tuple( + TheIndex.getOrInsertValueInfo(RefGUID), RefGUID, RefGUID); break; } } @@ -7116,6 +7134,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { PendingTypeCheckedLoadConstVCalls; std::vector PendingParamAccesses; + std::vector PendingCallsites; + std::vector PendingAllocs; + while (true) { Expected MaybeEntry = Stream.advanceSkippingSubblocks(); if (!MaybeEntry) @@ -7154,8 +7175,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { case bitc::FS_VALUE_GUID: { // [valueid, refguid] uint64_t ValueID = Record[0]; GlobalValue::GUID RefGUID = Record[1]; - ValueIdToValueInfoMap[ValueID] = - std::make_tuple(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID); + ValueIdToValueInfoMap[ValueID] = std::make_tuple( + TheIndex.getOrInsertValueInfo(RefGUID), RefGUID, RefGUID); break; } // FS_PERMODULE: [valueid, flags, instcount, fflags, numrefs, @@ -7207,6 +7228,13 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, HasRelBF); setSpecialRefs(Refs, NumRORefs, NumWORefs); + auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); + // In order to save memory, only record the memprof summaries if this is + // the prevailing copy of a symbol. + if (IsPrevailing && !IsPrevailing(std::get<2>(VIAndOriginalGUID))) { + PendingCallsites.clear(); + PendingAllocs.clear(); + } auto FS = std::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0, std::move(Refs), std::move(Calls), std::move(PendingTypeTests), @@ -7214,8 +7242,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), std::move(PendingTypeCheckedLoadConstVCalls), - std::move(PendingParamAccesses)); - auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); + std::move(PendingParamAccesses), std::move(PendingCallsites), + std::move(PendingAllocs)); FS->setModulePath(getThisModule()->first()); FS->setOriginalName(std::get<1>(VIAndOriginalGUID)); TheIndex.addGlobalValueSummary(std::get<0>(VIAndOriginalGUID), @@ -7358,7 +7386,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), std::move(PendingTypeCheckedLoadConstVCalls), - std::move(PendingParamAccesses)); + std::move(PendingParamAccesses), std::move(PendingCallsites), + std::move(PendingAllocs)); LastSeenSummary = FS.get(); LastSeenGUID = VI.getGUID(); FS->setModulePath(ModuleIdMap[ModuleId]); @@ -7484,6 +7513,95 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { PendingParamAccesses = parseParamAccesses(Record); break; } + + case bitc::FS_STACK_IDS: { // [n x stackid] + // Save stack ids in the reader to consult when adding stack ids from the + // lists in the stack node and alloc node entries. + StackIds = ArrayRef(Record); + break; + } + + case bitc::FS_PERMODULE_CALLSITE_INFO: { + unsigned ValueID = Record[0]; + SmallVector StackIdList; + for (auto R = Record.begin() + 1; R != Record.end(); R++) { + assert(*R < StackIds.size()); + StackIdList.push_back(TheIndex.addOrGetStackIdIndex(StackIds[*R])); + } + ValueInfo VI = std::get<0>(getValueInfoFromValueId(ValueID)); + PendingCallsites.push_back(CallsiteInfo({VI, std::move(StackIdList)})); + break; + } + + case bitc::FS_COMBINED_CALLSITE_INFO: { + auto RecordIter = Record.begin(); + unsigned ValueID = *RecordIter++; + unsigned NumStackIds = *RecordIter++; + unsigned NumVersions = *RecordIter++; + assert(Record.size() == 3 + NumStackIds + NumVersions); + SmallVector StackIdList; + for (unsigned J = 0; J < NumStackIds; J++) { + assert(*RecordIter < StackIds.size()); + StackIdList.push_back( + TheIndex.addOrGetStackIdIndex(StackIds[*RecordIter++])); + } + SmallVector Versions; + for (unsigned J = 0; J < NumVersions; J++) + Versions.push_back(*RecordIter++); + ValueInfo VI = std::get<0>( + getValueInfoFromValueId(ValueID)); + PendingCallsites.push_back( + CallsiteInfo({VI, std::move(Versions), std::move(StackIdList)})); + break; + } + + case bitc::FS_PERMODULE_ALLOC_INFO: { + unsigned I = 0; + std::vector MIBs; + while (I < Record.size()) { + assert(Record.size() - I >= 2); + AllocationType AllocType = (AllocationType)Record[I++]; + unsigned NumStackEntries = Record[I++]; + assert(Record.size() - I >= NumStackEntries); + SmallVector StackIdList; + for (unsigned J = 0; J < NumStackEntries; J++) { + assert(Record[I] < StackIds.size()); + StackIdList.push_back( + TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]])); + } + MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList))); + } + PendingAllocs.push_back(AllocInfo(std::move(MIBs))); + break; + } + + case bitc::FS_COMBINED_ALLOC_INFO: { + unsigned I = 0; + std::vector MIBs; + unsigned NumMIBs = Record[I++]; + unsigned NumVersions = Record[I++]; + unsigned MIBsRead = 0; + while (MIBsRead++ < NumMIBs) { + assert(Record.size() - I >= 2); + AllocationType AllocType = (AllocationType)Record[I++]; + unsigned NumStackEntries = Record[I++]; + assert(Record.size() - I >= NumStackEntries); + SmallVector StackIdList; + for (unsigned J = 0; J < NumStackEntries; J++) { + assert(Record[I] < StackIds.size()); + StackIdList.push_back( + TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]])); + } + MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList))); + } + assert(Record.size() - I >= NumVersions); + SmallVector Versions; + for (unsigned J = 0; J < NumVersions; J++) + Versions.push_back(Record[I++]); + PendingAllocs.push_back( + AllocInfo(std::move(Versions), std::move(MIBs))); + break; + } } } llvm_unreachable("Exit infinite loop"); @@ -7803,14 +7921,15 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, // We don't use ModuleIdentifier here because the client may need to control the // module path used in the combined summary (e.g. when reading summaries for // regular LTO modules). -Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex, - StringRef ModulePath, uint64_t ModuleId) { +Error BitcodeModule::readSummary( + ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, uint64_t ModuleId, + std::function IsPrevailing) { BitstreamCursor Stream(Buffer); if (Error JumpFailed = Stream.JumpToBit(ModuleBit)) return JumpFailed; ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex, - ModulePath, ModuleId); + ModulePath, ModuleId, IsPrevailing); return R.parseModule(); } -- cgit v1.2.3