diff options
| author | Michael Kruse <llvm-project@meinersbur.de> | 2025-01-03 10:22:51 +0100 |
|---|---|---|
| committer | Michael Kruse <llvm-project@meinersbur.de> | 2025-01-03 10:22:51 +0100 |
| commit | 38500d63e14ce340236840f60d356cdefb56a52c (patch) | |
| tree | 17edbec446ce9b50d2f215a483b83afb293a635d /llvm/include | |
| parent | 1a3d5daaef7a6a63448a497da3eff7fc9e23df26 (diff) | |
| parent | 27f30029741ecf023baece7b3dde1ff9011ffefc (diff) | |
Merge branch 'main' into users/meinersbur/flang_runtime_split-headersusers/meinersbur/flang_runtime_split-headers
Diffstat (limited to 'llvm/include')
181 files changed, 4890 insertions, 2116 deletions
diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h index 99bf30877223..6b21e6c6d75a 100644 --- a/llvm/include/llvm/ADT/IntervalMap.h +++ b/llvm/include/llvm/ADT/IntervalMap.h @@ -222,7 +222,7 @@ using IdxPair = std::pair<unsigned,unsigned>; template <typename T1, typename T2, unsigned N> class NodeBase { public: - enum { Capacity = N }; + static constexpr unsigned Capacity = N; T1 first[N]; T2 second[N]; diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h new file mode 100644 index 000000000000..4049f892fa66 --- /dev/null +++ b/llvm/include/llvm/ADT/StringTable.h @@ -0,0 +1,91 @@ +//===- StringTable.h - Table of strings tracked by offset ----------C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_STRING_TABLE_H +#define LLVM_ADT_STRING_TABLE_H + +#include "llvm/ADT/StringRef.h" +#include <limits> + +namespace llvm { + +/// A table of densely packed, null-terminated strings indexed by offset. +/// +/// This table abstracts a densely concatenated list of null-terminated strings, +/// each of which can be referenced using an offset into the table. +/// +/// This requires and ensures that the string at offset 0 is also the empty +/// string. This helps allow zero-initialized offsets form empty strings and +/// avoids non-zero initialization when using a string literal pointer would +/// allow a null pointer. +/// +/// The primary use case is having a single global string literal for the table +/// contents, and offsets into it in other global data structures to avoid +/// dynamic relocations of individual string literal pointers in those global +/// data structures. +class StringTable { + StringRef Table; + +public: + // An offset into one of these packed string tables, used to select a string + // within the table. + // + // Typically these are created by TableGen or other code generator from + // computed offsets, and it just wraps that integer into a type until it is + // used with the relevant table. + // + // We also ensure that the empty string is at offset zero and default + // constructing this class gives you an offset of zero. This makes default + // constructing this type work similarly (after indexing the table) to default + // constructing a `StringRef`. + class Offset { + // Note that we ensure the empty string is at offset zero. + unsigned Value = 0; + + public: + constexpr Offset() = default; + constexpr Offset(unsigned Value) : Value(Value) {} + + constexpr unsigned value() const { return Value; } + }; + + // We directly handle string literals with a templated converting constructor + // because we *don't* want to do `strlen` on them -- we fully expect null + // bytes in this input. This is somewhat the opposite of how `StringLiteral` + // works. + template <size_t N> + constexpr StringTable(const char (&RawTable)[N]) : Table(RawTable, N) { + static_assert(N <= std::numeric_limits<unsigned>::max(), + "We only support table sizes that can be indexed by an " + "`unsigned` offset."); + + // Note that we can only use `empty`, `data`, and `size` in these asserts to + // support `constexpr`. + assert(!Table.empty() && "Requires at least a valid empty string."); + assert(Table.data()[0] == '\0' && "Offset zero must be the empty string."); + // Ensure that `strlen` from any offset cannot overflow the end of the table + // by insisting on a null byte at the end. + assert(Table.data()[Table.size() - 1] == '\0' && + "Last byte must be a null byte."); + } + + // Get a string from the table starting with the provided offset. The returned + // `StringRef` is in fact null terminated, and so can be converted safely to a + // C-string if necessary for a system API. + constexpr StringRef operator[](Offset O) const { + assert(O.value() < Table.size() && "Out of bounds offset!"); + return Table.data() + O.value(); + } + + /// Returns the byte size of the table. + constexpr size_t size() const { return Table.size(); } +}; + +} // namespace llvm + +#endif // LLVM_ADT_STRING_TABLE_H diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 60875577561d..acc580f92b40 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -875,6 +875,13 @@ bool isIdentifiedObject(const Value *V); /// IdentifiedObjects. bool isIdentifiedFunctionLocal(const Value *V); +/// Return true if we know V to the base address of the corresponding memory +/// object. This implies that any address less than V must be out of bounds +/// for the underlying object. Note that just being isIdentifiedObject() is +/// not enough - For example, a negative offset from a noalias argument or call +/// can be inbounds w.r.t the actual underlying object. +bool isBaseOfObject(const Value *V); + /// Returns true if the pointer is one which would have been considered an /// escape by isNonEscapingLocalObject. bool isEscapeSource(const Value *V); diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index 6b577c02f054..87c5615c28ee 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -11,6 +11,8 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/Alignment.h" @@ -18,33 +20,187 @@ namespace llvm { class CallInst; +class DataLayout; class LLVMContext; class MDTuple; class Value; +class DXILResourceTypeMap; + namespace dxil { -class ResourceInfo { +/// The dx.RawBuffer target extension type +/// +/// `target("dx.RawBuffer", Type, IsWriteable, IsROV)` +class RawBufferExtType : public TargetExtType { public: - struct ResourceBinding { - uint32_t RecordID; - uint32_t Space; - uint32_t LowerBound; - uint32_t Size; + RawBufferExtType() = delete; + RawBufferExtType(const RawBufferExtType &) = delete; + RawBufferExtType &operator=(const RawBufferExtType &) = delete; + + bool isStructured() const { + // TODO: We need to be more prescriptive here, but since there's some debate + // over whether byte address buffer should have a void type or an i8 type, + // accept either for now. + Type *Ty = getTypeParameter(0); + return !Ty->isVoidTy() && !Ty->isIntegerTy(8); + } - bool operator==(const ResourceBinding &RHS) const { - return std::tie(RecordID, Space, LowerBound, Size) == - std::tie(RHS.RecordID, RHS.Space, RHS.LowerBound, RHS.Size); - } - bool operator!=(const ResourceBinding &RHS) const { - return !(*this == RHS); - } - bool operator<(const ResourceBinding &RHS) const { - return std::tie(RecordID, Space, LowerBound, Size) < - std::tie(RHS.RecordID, RHS.Space, RHS.LowerBound, RHS.Size); - } - }; + Type *getResourceType() const { + return isStructured() ? getTypeParameter(0) : nullptr; + } + bool isWriteable() const { return getIntParameter(0); } + bool isROV() const { return getIntParameter(1); } + static bool classof(const TargetExtType *T) { + return T->getName() == "dx.RawBuffer"; + } + static bool classof(const Type *T) { + return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T)); + } +}; + +/// The dx.TypedBuffer target extension type +/// +/// `target("dx.TypedBuffer", Type, IsWriteable, IsROV, IsSigned)` +class TypedBufferExtType : public TargetExtType { +public: + TypedBufferExtType() = delete; + TypedBufferExtType(const TypedBufferExtType &) = delete; + TypedBufferExtType &operator=(const TypedBufferExtType &) = delete; + + Type *getResourceType() const { return getTypeParameter(0); } + bool isWriteable() const { return getIntParameter(0); } + bool isROV() const { return getIntParameter(1); } + bool isSigned() const { return getIntParameter(2); } + + static bool classof(const TargetExtType *T) { + return T->getName() == "dx.TypedBuffer"; + } + static bool classof(const Type *T) { + return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T)); + } +}; + +/// The dx.Texture target extension type +/// +/// `target("dx.Texture", Type, IsWriteable, IsROV, IsSigned, Dimension)` +class TextureExtType : public TargetExtType { +public: + TextureExtType() = delete; + TextureExtType(const TextureExtType &) = delete; + TextureExtType &operator=(const TextureExtType &) = delete; + + Type *getResourceType() const { return getTypeParameter(0); } + bool isWriteable() const { return getIntParameter(0); } + bool isROV() const { return getIntParameter(1); } + bool isSigned() const { return getIntParameter(2); } + dxil::ResourceKind getDimension() const { + return static_cast<dxil::ResourceKind>(getIntParameter(3)); + } + + static bool classof(const TargetExtType *T) { + return T->getName() == "dx.Texture"; + } + static bool classof(const Type *T) { + return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T)); + } +}; + +/// The dx.MSTexture target extension type +/// +/// `target("dx.MSTexture", Type, IsWriteable, Samples, IsSigned, Dimension)` +class MSTextureExtType : public TargetExtType { +public: + MSTextureExtType() = delete; + MSTextureExtType(const MSTextureExtType &) = delete; + MSTextureExtType &operator=(const MSTextureExtType &) = delete; + + Type *getResourceType() const { return getTypeParameter(0); } + bool isWriteable() const { return getIntParameter(0); } + uint32_t getSampleCount() const { return getIntParameter(1); } + bool isSigned() const { return getIntParameter(2); } + dxil::ResourceKind getDimension() const { + return static_cast<dxil::ResourceKind>(getIntParameter(3)); + } + + static bool classof(const TargetExtType *T) { + return T->getName() == "dx.MSTexture"; + } + static bool classof(const Type *T) { + return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T)); + } +}; + +/// The dx.FeedbackTexture target extension type +/// +/// `target("dx.FeedbackTexture", FeedbackType, Dimension)` +class FeedbackTextureExtType : public TargetExtType { +public: + FeedbackTextureExtType() = delete; + FeedbackTextureExtType(const FeedbackTextureExtType &) = delete; + FeedbackTextureExtType &operator=(const FeedbackTextureExtType &) = delete; + + dxil::SamplerFeedbackType getFeedbackType() const { + return static_cast<dxil::SamplerFeedbackType>(getIntParameter(0)); + } + dxil::ResourceKind getDimension() const { + return static_cast<dxil::ResourceKind>(getIntParameter(1)); + } + + static bool classof(const TargetExtType *T) { + return T->getName() == "dx.FeedbackTexture"; + } + static bool classof(const Type *T) { + return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T)); + } +}; + +/// The dx.CBuffer target extension type +/// +/// `target("dx.CBuffer", <Type>, ...)` +class CBufferExtType : public TargetExtType { +public: + CBufferExtType() = delete; + CBufferExtType(const CBufferExtType &) = delete; + CBufferExtType &operator=(const CBufferExtType &) = delete; + + Type *getResourceType() const { return getTypeParameter(0); } + uint32_t getCBufferSize() const { return getIntParameter(0); } + + static bool classof(const TargetExtType *T) { + return T->getName() == "dx.CBuffer"; + } + static bool classof(const Type *T) { + return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T)); + } +}; + +/// The dx.Sampler target extension type +/// +/// `target("dx.Sampler", SamplerType)` +class SamplerExtType : public TargetExtType { +public: + SamplerExtType() = delete; + SamplerExtType(const SamplerExtType &) = delete; + SamplerExtType &operator=(const SamplerExtType &) = delete; + + dxil::SamplerType getSamplerType() const { + return static_cast<dxil::SamplerType>(getIntParameter(0)); + } + + static bool classof(const TargetExtType *T) { + return T->getName() == "dx.Sampler"; + } + static bool classof(const Type *T) { + return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T)); + } +}; + +//===----------------------------------------------------------------------===// + +class ResourceTypeInfo { +public: struct UAVInfo { bool GloballyCoherent; bool HasCounter; @@ -93,55 +249,31 @@ public: } }; - struct MSInfo { - uint32_t Count; - - bool operator==(const MSInfo &RHS) const { return Count == RHS.Count; } - bool operator!=(const MSInfo &RHS) const { return !(*this == RHS); } - bool operator<(const MSInfo &RHS) const { return Count < RHS.Count; } - }; - - struct FeedbackInfo { - dxil::SamplerFeedbackType Type; - - bool operator==(const FeedbackInfo &RHS) const { return Type == RHS.Type; } - bool operator!=(const FeedbackInfo &RHS) const { return !(*this == RHS); } - bool operator<(const FeedbackInfo &RHS) const { return Type < RHS.Type; } - }; - private: - // Universal properties. - Value *Symbol; - StringRef Name; + TargetExtType *HandleTy; + + // GloballyCoherent and HasCounter aren't really part of the type and need to + // be determined by analysis, so they're just provided directly by the + // DXILResourceTypeMap when we construct these. + bool GloballyCoherent; + bool HasCounter; dxil::ResourceClass RC; dxil::ResourceKind Kind; - ResourceBinding Binding = {}; - - // Resource class dependent properties. - // CBuffer, Sampler, and RawBuffer end here. - union { - UAVInfo UAVFlags; // UAV - uint32_t CBufferSize; // CBuffer - dxil::SamplerType SamplerTy; // Sampler - }; - - // Resource kind dependent properties. - union { - StructInfo Struct; // StructuredBuffer - TypedInfo Typed; // All SRV/UAV except Raw/StructuredBuffer - FeedbackInfo Feedback; // FeedbackTexture - }; - - MSInfo MultiSample; - public: - ResourceInfo(dxil::ResourceClass RC, dxil::ResourceKind Kind, Value *Symbol, - StringRef Name) - : Symbol(Symbol), Name(Name), RC(RC), Kind(Kind) {} - - // Conditions to check before accessing union members. + ResourceTypeInfo(TargetExtType *HandleTy, const dxil::ResourceClass RC, + const dxil::ResourceKind Kind, bool GloballyCoherent = false, + bool HasCounter = false); + ResourceTypeInfo(TargetExtType *HandleTy, bool GloballyCoherent = false, + bool HasCounter = false) + : ResourceTypeInfo(HandleTy, {}, dxil::ResourceKind::Invalid, + GloballyCoherent, HasCounter) {} + + TargetExtType *getHandleTy() const { return HandleTy; } + StructType *createElementStruct(); + + // Conditions to check before accessing specific views. bool isUAV() const; bool isCBuffer() const; bool isSampler() const; @@ -150,148 +282,173 @@ public: bool isFeedback() const; bool isMultiSample() const; - void bind(uint32_t RecordID, uint32_t Space, uint32_t LowerBound, - uint32_t Size) { - Binding.RecordID = RecordID; - Binding.Space = Space; - Binding.LowerBound = LowerBound; - Binding.Size = Size; - } + // Views into the type. + UAVInfo getUAV() const; + uint32_t getCBufferSize(const DataLayout &DL) const; + dxil::SamplerType getSamplerType() const; + StructInfo getStruct(const DataLayout &DL) const; + TypedInfo getTyped() const; + dxil::SamplerFeedbackType getFeedbackType() const; + uint32_t getMultiSampleCount() const; + + dxil::ResourceClass getResourceClass() const { return RC; } + dxil::ResourceKind getResourceKind() const { return Kind; } + + void setGloballyCoherent(bool V) { GloballyCoherent = V; } + void setHasCounter(bool V) { HasCounter = V; } + + bool operator==(const ResourceTypeInfo &RHS) const; + bool operator!=(const ResourceTypeInfo &RHS) const { return !(*this == RHS); } + bool operator<(const ResourceTypeInfo &RHS) const; + + void print(raw_ostream &OS, const DataLayout &DL) const; +}; + +//===----------------------------------------------------------------------===// + +class ResourceBindingInfo { +public: + struct ResourceBinding { + uint32_t RecordID; + uint32_t Space; + uint32_t LowerBound; + uint32_t Size; + + bool operator==(const ResourceBinding &RHS) const { + return std::tie(RecordID, Space, LowerBound, Size) == + std::tie(RHS.RecordID, RHS.Space, RHS.LowerBound, RHS.Size); + } + bool operator!=(const ResourceBinding &RHS) const { + return !(*this == RHS); + } + bool operator<(const ResourceBinding &RHS) const { + return std::tie(RecordID, Space, LowerBound, Size) < + std::tie(RHS.RecordID, RHS.Space, RHS.LowerBound, RHS.Size); + } + }; + +private: + ResourceBinding Binding; + TargetExtType *HandleTy; + GlobalVariable *Symbol = nullptr; + +public: + ResourceBindingInfo(uint32_t RecordID, uint32_t Space, uint32_t LowerBound, + uint32_t Size, TargetExtType *HandleTy, + GlobalVariable *Symbol = nullptr) + : Binding{RecordID, Space, LowerBound, Size}, HandleTy(HandleTy), + Symbol(Symbol) {} + + void setBindingID(unsigned ID) { Binding.RecordID = ID; } + const ResourceBinding &getBinding() const { return Binding; } - void setUAV(bool GloballyCoherent, bool HasCounter, bool IsROV) { - assert(isUAV() && "Not a UAV"); - UAVFlags.GloballyCoherent = GloballyCoherent; - UAVFlags.HasCounter = HasCounter; - UAVFlags.IsROV = IsROV; - } - const UAVInfo &getUAV() const { - assert(isUAV() && "Not a UAV"); - return UAVFlags; - } - void setCBuffer(uint32_t Size) { - assert(isCBuffer() && "Not a CBuffer"); - CBufferSize = Size; - } - void setSampler(dxil::SamplerType Ty) { SamplerTy = Ty; } - void setStruct(uint32_t Stride, MaybeAlign Alignment) { - assert(isStruct() && "Not a Struct"); - Struct.Stride = Stride; - Struct.AlignLog2 = Alignment ? Log2(*Alignment) : 0; - } - void setTyped(dxil::ElementType ElementTy, uint32_t ElementCount) { - assert(isTyped() && "Not Typed"); - Typed.ElementTy = ElementTy; - Typed.ElementCount = ElementCount; + TargetExtType *getHandleTy() const { return HandleTy; } + const StringRef getName() const { return Symbol ? Symbol->getName() : ""; } + + bool hasSymbol() const { return Symbol; } + GlobalVariable *createSymbol(Module &M, StructType *Ty, StringRef Name = ""); + MDTuple *getAsMetadata(Module &M, dxil::ResourceTypeInfo &RTI) const; + + std::pair<uint32_t, uint32_t> + getAnnotateProps(Module &M, dxil::ResourceTypeInfo &RTI) const; + + bool operator==(const ResourceBindingInfo &RHS) const { + return std::tie(Binding, HandleTy, Symbol) == + std::tie(RHS.Binding, RHS.HandleTy, RHS.Symbol); } - const TypedInfo &getTyped() const { - assert(isTyped() && "Not typed"); - return Typed; + bool operator!=(const ResourceBindingInfo &RHS) const { + return !(*this == RHS); } - void setFeedback(dxil::SamplerFeedbackType Type) { - assert(isFeedback() && "Not Feedback"); - Feedback.Type = Type; + bool operator<(const ResourceBindingInfo &RHS) const { + return Binding < RHS.Binding; } - void setMultiSample(uint32_t Count) { - assert(isMultiSample() && "Not MultiSampled"); - MultiSample.Count = Count; + + void print(raw_ostream &OS, dxil::ResourceTypeInfo &RTI, + const DataLayout &DL) const; +}; + +} // namespace dxil + +//===----------------------------------------------------------------------===// + +class DXILResourceTypeMap { + DenseMap<TargetExtType *, dxil::ResourceTypeInfo> Infos; + +public: + bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv); + + dxil::ResourceTypeInfo &operator[](TargetExtType *Ty) { + auto It = Infos.find(Ty); + if (It != Infos.end()) + return It->second; + auto [NewIt, Inserted] = Infos.try_emplace(Ty, Ty); + return NewIt->second; } - const MSInfo &getMultiSample() const { - assert(isMultiSample() && "Not MultiSampled"); - return MultiSample; +}; + +class DXILResourceTypeAnalysis + : public AnalysisInfoMixin<DXILResourceTypeAnalysis> { + friend AnalysisInfoMixin<DXILResourceTypeAnalysis>; + + static AnalysisKey Key; + +public: + using Result = DXILResourceTypeMap; + + DXILResourceTypeMap run(Module &M, ModuleAnalysisManager &AM) { + // Running the pass just generates an empty map, which will be filled when + // users of the pass query the results. + return Result(); } +}; - StringRef getName() const { return Name; } - dxil::ResourceClass getResourceClass() const { return RC; } - dxil::ResourceKind getResourceKind() const { return Kind; } +class DXILResourceTypeWrapperPass : public ImmutablePass { + DXILResourceTypeMap DRTM; - bool operator==(const ResourceInfo &RHS) const; - bool operator!=(const ResourceInfo &RHS) const { return !(*this == RHS); } - bool operator<(const ResourceInfo &RHS) const; - - static ResourceInfo SRV(Value *Symbol, StringRef Name, - dxil::ElementType ElementTy, uint32_t ElementCount, - dxil::ResourceKind Kind); - static ResourceInfo RawBuffer(Value *Symbol, StringRef Name); - static ResourceInfo StructuredBuffer(Value *Symbol, StringRef Name, - uint32_t Stride, MaybeAlign Alignment); - static ResourceInfo Texture2DMS(Value *Symbol, StringRef Name, - dxil::ElementType ElementTy, - uint32_t ElementCount, uint32_t SampleCount); - static ResourceInfo Texture2DMSArray(Value *Symbol, StringRef Name, - dxil::ElementType ElementTy, - uint32_t ElementCount, - uint32_t SampleCount); - - static ResourceInfo UAV(Value *Symbol, StringRef Name, - dxil::ElementType ElementTy, uint32_t ElementCount, - bool GloballyCoherent, bool IsROV, - dxil::ResourceKind Kind); - static ResourceInfo RWRawBuffer(Value *Symbol, StringRef Name, - bool GloballyCoherent, bool IsROV); - static ResourceInfo RWStructuredBuffer(Value *Symbol, StringRef Name, - uint32_t Stride, MaybeAlign Alignment, - bool GloballyCoherent, bool IsROV, - bool HasCounter); - static ResourceInfo RWTexture2DMS(Value *Symbol, StringRef Name, - dxil::ElementType ElementTy, - uint32_t ElementCount, uint32_t SampleCount, - bool GloballyCoherent); - static ResourceInfo RWTexture2DMSArray(Value *Symbol, StringRef Name, - dxil::ElementType ElementTy, - uint32_t ElementCount, - uint32_t SampleCount, - bool GloballyCoherent); - static ResourceInfo FeedbackTexture2D(Value *Symbol, StringRef Name, - dxil::SamplerFeedbackType FeedbackTy); - static ResourceInfo - FeedbackTexture2DArray(Value *Symbol, StringRef Name, - dxil::SamplerFeedbackType FeedbackTy); - - static ResourceInfo CBuffer(Value *Symbol, StringRef Name, uint32_t Size); - - static ResourceInfo Sampler(Value *Symbol, StringRef Name, - dxil::SamplerType SamplerTy); - - MDTuple *getAsMetadata(LLVMContext &Ctx) const; - - std::pair<uint32_t, uint32_t> getAnnotateProps() const; - - void print(raw_ostream &OS) const; + virtual void anchor(); + +public: + static char ID; + DXILResourceTypeWrapperPass(); + + DXILResourceTypeMap &getResourceTypeMap() { return DRTM; } + const DXILResourceTypeMap &getResourceTypeMap() const { return DRTM; } }; -} // namespace dxil +ModulePass *createDXILResourceTypeWrapperPassPass(); -class DXILResourceMap { - SmallVector<dxil::ResourceInfo> Resources; +//===----------------------------------------------------------------------===// + +class DXILBindingMap { + SmallVector<dxil::ResourceBindingInfo> Infos; DenseMap<CallInst *, unsigned> CallMap; unsigned FirstUAV = 0; unsigned FirstCBuffer = 0; unsigned FirstSampler = 0; -public: - using iterator = SmallVector<dxil::ResourceInfo>::iterator; - using const_iterator = SmallVector<dxil::ResourceInfo>::const_iterator; + /// Populate the map given the resource binding calls in the given module. + void populate(Module &M, DXILResourceTypeMap &DRTM); - DXILResourceMap( - SmallVectorImpl<std::pair<CallInst *, dxil::ResourceInfo>> &&CIToRI); +public: + using iterator = SmallVector<dxil::ResourceBindingInfo>::iterator; + using const_iterator = SmallVector<dxil::ResourceBindingInfo>::const_iterator; - iterator begin() { return Resources.begin(); } - const_iterator begin() const { return Resources.begin(); } - iterator end() { return Resources.end(); } - const_iterator end() const { return Resources.end(); } + iterator begin() { return Infos.begin(); } + const_iterator begin() const { return Infos.begin(); } + iterator end() { return Infos.end(); } + const_iterator end() const { return Infos.end(); } - bool empty() const { return Resources.empty(); } + bool empty() const { return Infos.empty(); } iterator find(const CallInst *Key) { auto Pos = CallMap.find(Key); - return Pos == CallMap.end() ? Resources.end() - : (Resources.begin() + Pos->second); + return Pos == CallMap.end() ? Infos.end() : (Infos.begin() + Pos->second); } const_iterator find(const CallInst *Key) const { auto Pos = CallMap.find(Key); - return Pos == CallMap.end() ? Resources.end() - : (Resources.begin() + Pos->second); + return Pos == CallMap.end() ? Infos.end() : (Infos.begin() + Pos->second); } iterator srv_begin() { return begin(); } @@ -334,44 +491,51 @@ public: return make_range(sampler_begin(), sampler_end()); } - void print(raw_ostream &OS) const; + void print(raw_ostream &OS, DXILResourceTypeMap &DRTM, + const DataLayout &DL) const; + + friend class DXILResourceBindingAnalysis; + friend class DXILResourceBindingWrapperPass; }; -class DXILResourceAnalysis : public AnalysisInfoMixin<DXILResourceAnalysis> { - friend AnalysisInfoMixin<DXILResourceAnalysis>; +class DXILResourceBindingAnalysis + : public AnalysisInfoMixin<DXILResourceBindingAnalysis> { + friend AnalysisInfoMixin<DXILResourceBindingAnalysis>; static AnalysisKey Key; public: - using Result = DXILResourceMap; + using Result = DXILBindingMap; /// Gather resource info for the module \c M. - DXILResourceMap run(Module &M, ModuleAnalysisManager &AM); + DXILBindingMap run(Module &M, ModuleAnalysisManager &AM); }; -/// Printer pass for the \c DXILResourceAnalysis results. -class DXILResourcePrinterPass : public PassInfoMixin<DXILResourcePrinterPass> { +/// Printer pass for the \c DXILResourceBindingAnalysis results. +class DXILResourceBindingPrinterPass + : public PassInfoMixin<DXILResourceBindingPrinterPass> { raw_ostream &OS; public: - explicit DXILResourcePrinterPass(raw_ostream &OS) : OS(OS) {} + explicit DXILResourceBindingPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } }; -class DXILResourceWrapperPass : public ModulePass { - std::unique_ptr<DXILResourceMap> ResourceMap; +class DXILResourceBindingWrapperPass : public ModulePass { + std::unique_ptr<DXILBindingMap> Map; + DXILResourceTypeMap *DRTM; public: static char ID; // Class identification, replacement for typeinfo - DXILResourceWrapperPass(); - ~DXILResourceWrapperPass() override; + DXILResourceBindingWrapperPass(); + ~DXILResourceBindingWrapperPass() override; - const DXILResourceMap &getResourceMap() const { return *ResourceMap; } - DXILResourceMap &getResourceMap() { return *ResourceMap; } + const DXILBindingMap &getBindingMap() const { return *Map; } + DXILBindingMap &getBindingMap() { return *Map; } void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnModule(Module &M) override; @@ -381,7 +545,7 @@ public: void dump() const; }; -ModulePass *createDXILResourceWrapperPassPass(); +ModulePass *createDXILResourceBindingWrapperPassPass(); } // namespace llvm diff --git a/llvm/include/llvm/Analysis/DomTreeUpdater.h b/llvm/include/llvm/Analysis/DomTreeUpdater.h index c120a6cc6ce5..0386262ba2b6 100644 --- a/llvm/include/llvm/Analysis/DomTreeUpdater.h +++ b/llvm/include/llvm/Analysis/DomTreeUpdater.h @@ -81,6 +81,9 @@ public: ///@} + /// Debug method to help view the internal state of this class. + LLVM_DUMP_METHOD void dump() const; + private: class CallBackOnDeletion final : public CallbackVH { public: @@ -109,9 +112,6 @@ private: /// Returns true if at least one BasicBlock is deleted. bool forceFlushDeletedBB(); - - /// Debug method to help view the internal state of this class. - LLVM_DUMP_METHOD void dump() const; }; extern template class GenericDomTreeUpdater<DomTreeUpdater, DominatorTree, @@ -120,6 +120,13 @@ extern template class GenericDomTreeUpdater<DomTreeUpdater, DominatorTree, extern template void GenericDomTreeUpdater<DomTreeUpdater, DominatorTree, PostDominatorTree>::recalculate(Function &F); + +extern template void +GenericDomTreeUpdater<DomTreeUpdater, DominatorTree, PostDominatorTree>:: + applyUpdatesImpl</*IsForward=*/true>(); +extern template void +GenericDomTreeUpdater<DomTreeUpdater, DominatorTree, PostDominatorTree>:: + applyUpdatesImpl</*IsForward=*/false>(); } // namespace llvm #endif // LLVM_ANALYSIS_DOMTREEUPDATER_H diff --git a/llvm/include/llvm/Analysis/GenericDomTreeUpdater.h b/llvm/include/llvm/Analysis/GenericDomTreeUpdater.h index ca4ce68b85cb..4a03f548823e 100644 --- a/llvm/include/llvm/Analysis/GenericDomTreeUpdater.h +++ b/llvm/include/llvm/Analysis/GenericDomTreeUpdater.h @@ -30,6 +30,7 @@ class GenericDomTreeUpdater { public: enum class UpdateStrategy : unsigned char { Eager = 0, Lazy = 1 }; using BasicBlockT = typename DomTreeT::NodeType; + using UpdateT = typename DomTreeT::UpdateType; explicit GenericDomTreeUpdater(UpdateStrategy Strategy_) : Strategy(Strategy_) {} @@ -146,7 +147,12 @@ public: /// 2. It is illegal to submit any update that has already been submitted, /// i.e., you are supposed not to insert an existent edge or delete a /// nonexistent edge. - void applyUpdates(ArrayRef<typename DomTreeT::UpdateType> Updates); + void applyUpdates(ArrayRef<UpdateT> Updates); + + /// Apply updates that the critical edge (FromBB, ToBB) has been + /// split with NewBB. + void splitCriticalEdge(BasicBlockT *FromBB, BasicBlockT *ToBB, + BasicBlockT *NewBB); /// Submit updates to all available trees. It will also /// 1. discard duplicated updates, @@ -169,7 +175,7 @@ public: /// 3. It is only legal to submit updates to an edge in the order CFG changes /// are made. The order you submit updates on different edges is not /// restricted. - void applyUpdatesPermissive(ArrayRef<typename DomTreeT::UpdateType> Updates); + void applyUpdatesPermissive(ArrayRef<UpdateT> Updates); ///@} @@ -205,7 +211,25 @@ public: LLVM_DUMP_METHOD void dump() const; protected: - SmallVector<typename DomTreeT::UpdateType, 16> PendUpdates; + /// Helper structure used to hold all the basic blocks + /// involved in the split of a critical edge. + struct CriticalEdge { + BasicBlockT *FromBB; + BasicBlockT *ToBB; + BasicBlockT *NewBB; + }; + + struct DomTreeUpdate { + bool IsCriticalEdgeSplit = false; + union { + UpdateT Update; + CriticalEdge EdgeSplit; + }; + DomTreeUpdate(UpdateT Update) : Update(Update) {} + DomTreeUpdate(CriticalEdge E) : IsCriticalEdgeSplit(true), EdgeSplit(E) {} + }; + + SmallVector<DomTreeUpdate, 16> PendUpdates; size_t PendDTUpdateIndex = 0; size_t PendPDTUpdateIndex = 0; DomTreeT *DT = nullptr; @@ -216,21 +240,21 @@ protected: bool IsRecalculatingPostDomTree = false; /// Returns true if the update is self dominance. - bool isSelfDominance(typename DomTreeT::UpdateType Update) const { + bool isSelfDominance(UpdateT Update) const { // Won't affect DomTree and PostDomTree. return Update.getFrom() == Update.getTo(); } /// Helper function to apply all pending DomTree updates. - void applyDomTreeUpdates(); + void applyDomTreeUpdates() { applyUpdatesImpl<true>(); } /// Helper function to apply all pending PostDomTree updates. - void applyPostDomTreeUpdates(); + void applyPostDomTreeUpdates() { applyUpdatesImpl<false>(); } /// Returns true if the update appears in the LLVM IR. /// It is used to check whether an update is valid in /// insertEdge/deleteEdge or is unnecessary in the batch update. - bool isUpdateValid(typename DomTreeT::UpdateType Update) const; + bool isUpdateValid(UpdateT Update) const; /// Erase Basic Block node before it is unlinked from Function /// in the DomTree and PostDomTree. @@ -243,6 +267,11 @@ protected: /// Drop all updates applied by all available trees and delete BasicBlocks if /// all available trees are up-to-date. void dropOutOfDateUpdates(); + +private: + void splitDTCriticalEdges(ArrayRef<CriticalEdge> Updates); + void splitPDTCriticalEdges(ArrayRef<CriticalEdge> Updates); + template <bool IsForward> void applyUpdatesImpl(); }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/GenericDomTreeUpdaterImpl.h b/llvm/include/llvm/Analysis/GenericDomTreeUpdaterImpl.h index b79eaef57710..896b68c5021b 100644 --- a/llvm/include/llvm/Analysis/GenericDomTreeUpdaterImpl.h +++ b/llvm/include/llvm/Analysis/GenericDomTreeUpdaterImpl.h @@ -16,6 +16,7 @@ #ifndef LLVM_ANALYSIS_GENERICDOMTREEUPDATERIMPL_H #define LLVM_ANALYSIS_GENERICDOMTREEUPDATERIMPL_H +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Analysis/GenericDomTreeUpdater.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -56,7 +57,7 @@ void GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>::recalculate( template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> void GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>::applyUpdates( - ArrayRef<typename DomTreeT::UpdateType> Updates) { + ArrayRef<UpdateT> Updates) { if (!DT && !PDT) return; @@ -77,12 +78,12 @@ void GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>::applyUpdates( template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> void GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>:: - applyUpdatesPermissive(ArrayRef<typename DomTreeT::UpdateType> Updates) { + applyUpdatesPermissive(ArrayRef<UpdateT> Updates) { if (!DT && !PDT) return; SmallSet<std::pair<BasicBlockT *, BasicBlockT *>, 8> Seen; - SmallVector<typename DomTreeT::UpdateType, 8> DeduplicatedUpdates; + SmallVector<UpdateT, 8> DeduplicatedUpdates; for (const auto &U : Updates) { auto Edge = std::make_pair(U.getFrom(), U.getTo()); // Because it is illegal to submit updates that have already been applied @@ -130,6 +131,24 @@ void GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>:: } template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> +void GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>::splitCriticalEdge( + BasicBlockT *FromBB, BasicBlockT *ToBB, BasicBlockT *NewBB) { + if (!DT && !PDT) + return; + + CriticalEdge E = {FromBB, ToBB, NewBB}; + if (Strategy == UpdateStrategy::Lazy) { + PendUpdates.push_back(E); + return; + } + + if (DT) + splitDTCriticalEdges(E); + if (PDT) + splitPDTCriticalEdges(E); +} + +template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> DomTreeT & GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>::getDomTree() { assert(DT && "Invalid acquisition of a null DomTree"); @@ -171,39 +190,40 @@ GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>::dump() const { OS << "Lazy\n"; int Index = 0; + auto printBlockInfo = [&](BasicBlockT *BB, StringRef Ending) { + if (BB) { + auto S = BB->getName(); + if (!BB->hasName()) + S = "(no name)"; + OS << S << "(" << BB << ")" << Ending; + } else { + OS << "(badref)" << Ending; + } + }; + auto printUpdates = - [&](typename ArrayRef<typename DomTreeT::UpdateType>::const_iterator - begin, - typename ArrayRef<typename DomTreeT::UpdateType>::const_iterator - end) { + [&](typename ArrayRef<DomTreeUpdate>::const_iterator begin, + typename ArrayRef<DomTreeUpdate>::const_iterator end) { if (begin == end) OS << " None\n"; Index = 0; for (auto It = begin, ItEnd = end; It != ItEnd; ++It) { - auto U = *It; - OS << " " << Index << " : "; - ++Index; - if (U.getKind() == DomTreeT::Insert) - OS << "Insert, "; - else - OS << "Delete, "; - BasicBlockT *From = U.getFrom(); - if (From) { - auto S = From->getName(); - if (!From->hasName()) - S = "(no name)"; - OS << S << "(" << From << "), "; - } else { - OS << "(badref), "; - } - BasicBlockT *To = U.getTo(); - if (To) { - auto S = To->getName(); - if (!To->hasName()) - S = "(no_name)"; - OS << S << "(" << To << ")\n"; + if (!It->IsCriticalEdgeSplit) { + auto U = It->Update; + OS << " " << Index << " : "; + ++Index; + if (U.getKind() == DomTreeT::Insert) + OS << "Insert, "; + else + OS << "Delete, "; + printBlockInfo(U.getFrom(), ", "); + printBlockInfo(U.getTo(), "\n"); } else { - OS << "(badref)\n"; + const auto &Edge = It->EdgeSplit; + OS << " " << Index++ << " : Split critical edge, "; + printBlockInfo(Edge.FromBB, ", "); + printBlockInfo(Edge.ToBB, ", "); + printBlockInfo(Edge.NewBB, "\n"); } } }; @@ -236,50 +256,53 @@ GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>::dump() const { if (BB->hasName()) OS << BB->getName() << "("; else - OS << "(no_name)("; + OS << "(no name)("; OS << BB << ")\n"; } #endif } template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> +template <bool IsForward> void GenericDomTreeUpdater<DerivedT, DomTreeT, - PostDomTreeT>::applyDomTreeUpdates() { + PostDomTreeT>::applyUpdatesImpl() { + auto *DomTree = [&]() { + if constexpr (IsForward) + return DT; + else + return PDT; + }(); // No pending DomTreeUpdates. - if (Strategy != UpdateStrategy::Lazy || !DT) + if (Strategy != UpdateStrategy::Lazy || !DomTree) return; + size_t &PendUpdateIndex = IsForward ? PendDTUpdateIndex : PendPDTUpdateIndex; - // Only apply updates not are applied by DomTree. - if (hasPendingDomTreeUpdates()) { - const auto I = PendUpdates.begin() + PendDTUpdateIndex; + // Only apply updates not are applied by (Post)DomTree. + while (IsForward ? hasPendingDomTreeUpdates() + : hasPendingPostDomTreeUpdates()) { + auto I = PendUpdates.begin() + PendUpdateIndex; const auto E = PendUpdates.end(); assert(I < E && "Iterator range invalid; there should be DomTree updates."); - DT->applyUpdates(ArrayRef<typename DomTreeT::UpdateType>(I, E)); - PendDTUpdateIndex = PendUpdates.size(); - } -} - -template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> -void GenericDomTreeUpdater<DerivedT, DomTreeT, - PostDomTreeT>::applyPostDomTreeUpdates() { - // No pending PostDomTreeUpdates. - if (Strategy != UpdateStrategy::Lazy || !PDT) - return; - - // Only apply updates not are applied by PostDomTree. - if (hasPendingPostDomTreeUpdates()) { - const auto I = PendUpdates.begin() + PendPDTUpdateIndex; - const auto E = PendUpdates.end(); - assert(I < E && - "Iterator range invalid; there should be PostDomTree updates."); - PDT->applyUpdates(ArrayRef<typename DomTreeT::UpdateType>(I, E)); - PendPDTUpdateIndex = PendUpdates.size(); + if (!I->IsCriticalEdgeSplit) { + SmallVector<UpdateT, 32> NormalUpdates; + for (; I != E && !I->IsCriticalEdgeSplit; ++I) + NormalUpdates.push_back(I->Update); + DomTree->applyUpdates(NormalUpdates); + PendUpdateIndex += NormalUpdates.size(); + } else { + SmallVector<CriticalEdge> CriticalEdges; + for (; I != E && I->IsCriticalEdgeSplit; ++I) + CriticalEdges.push_back(I->EdgeSplit); + IsForward ? splitDTCriticalEdges(CriticalEdges) + : splitPDTCriticalEdges(CriticalEdges); + PendUpdateIndex += CriticalEdges.size(); + } } } template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> bool GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>::isUpdateValid( - typename DomTreeT::UpdateType Update) const { + UpdateT Update) const { const auto *From = Update.getFrom(); const auto *To = Update.getTo(); const auto Kind = Update.getKind(); @@ -347,6 +370,96 @@ void GenericDomTreeUpdater<DerivedT, DomTreeT, PendPDTUpdateIndex -= dropIndex; } +template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> +void GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>:: + splitDTCriticalEdges(ArrayRef<CriticalEdge> Edges) { + // Bail out early if there is nothing to do. + if (!DT || Edges.empty()) + return; + + // Remember all the basic blocks that are inserted during + // edge splitting. + // Invariant: NewBBs == all the basic blocks contained in the NewBB + // field of all the elements of Edges. + // I.e., forall elt in Edges, it exists BB in NewBBs + // such as BB == elt.NewBB. + SmallSet<BasicBlockT *, 32> NewBBs; + for (auto &Edge : Edges) + NewBBs.insert(Edge.NewBB); + // For each element in Edges, remember whether or not element + // is the new immediate domminator of its successor. The mapping is done by + // index, i.e., the information for the ith element of Edges is + // the ith element of IsNewIDom. + SmallBitVector IsNewIDom(Edges.size(), true); + + // Collect all the dominance properties info, before invalidating + // the underlying DT. + for (const auto &[Idx, Edge] : enumerate(Edges)) { + // Update dominator information. + BasicBlockT *Succ = Edge.ToBB; + auto *SuccDTNode = DT->getNode(Succ); + + for (BasicBlockT *PredBB : predecessors(Succ)) { + if (PredBB == Edge.NewBB) + continue; + // If we are in this situation: + // FromBB1 FromBB2 + // + + + // + + + + + // + + + + + // ... Split1 Split2 ... + // + + + // + + + // + + // Succ + // Instead of checking the domiance property with Split2, we check it + // with FromBB2 since Split2 is still unknown of the underlying DT + // structure. + if (NewBBs.contains(PredBB)) { + assert(pred_size(PredBB) == 1 && "A basic block resulting from a " + "critical edge split has more " + "than one predecessor!"); + PredBB = *pred_begin(PredBB); + } + if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) { + IsNewIDom[Idx] = false; + break; + } + } + } + + // Now, update DT with the collected dominance properties info. + for (const auto &[Idx, Edge] : enumerate(Edges)) { + // We know FromBB dominates NewBB. + auto *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom[Idx]) + DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode); + } +} + +// Post dominator tree is different, the new basic block in critical edge +// may become the new root. +template <typename DerivedT, typename DomTreeT, typename PostDomTreeT> +void GenericDomTreeUpdater<DerivedT, DomTreeT, PostDomTreeT>:: + splitPDTCriticalEdges(ArrayRef<CriticalEdge> Edges) { + // Bail out early if there is nothing to do. + if (!PDT || Edges.empty()) + return; + + std::vector<UpdateT> Updates; + for (const auto &Edge : Edges) { + Updates.push_back({PostDomTreeT::Insert, Edge.FromBB, Edge.NewBB}); + Updates.push_back({PostDomTreeT::Insert, Edge.NewBB, Edge.ToBB}); + if (!llvm::is_contained(successors(Edge.FromBB), Edge.ToBB)) + Updates.push_back({PostDomTreeT::Delete, Edge.FromBB, Edge.ToBB}); + } + PDT->applyUpdates(Updates); +} + } // namespace llvm #endif // LLVM_ANALYSIS_GENERICDOMTREEUPDATERIMPL_H diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 5d992faf99d2..e8041e22b031 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -50,9 +50,16 @@ enum class RecurKind { FMulAdd, ///< Sum of float products with llvm.fmuladd(a * b + sum). IAnyOf, ///< Any_of reduction with select(icmp(),x,y) where one of (x,y) is ///< loop invariant, and both x and y are integer type. - FAnyOf ///< Any_of reduction with select(fcmp(),x,y) where one of (x,y) is + FAnyOf, ///< Any_of reduction with select(fcmp(),x,y) where one of (x,y) is ///< loop invariant, and both x and y are integer type. - // TODO: Any_of reduction need not be restricted to integer type only. + IFindLastIV, ///< FindLast reduction with select(icmp(),x,y) where one of + ///< (x,y) is increasing loop induction, and both x and y are + ///< integer type. + FFindLastIV ///< FindLast reduction with select(fcmp(),x,y) where one of (x,y) + ///< is increasing loop induction, and both x and y are integer + ///< type. + // TODO: Any_of and FindLast reduction need not be restricted to integer type + // only. }; /// The RecurrenceDescriptor is used to identify recurrences variables in a @@ -124,7 +131,7 @@ public: /// the returned struct. static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, - FastMathFlags FuncFMF); + FastMathFlags FuncFMF, ScalarEvolution *SE); /// Returns true if instruction I has multiple uses in Insts static bool hasMultipleUsesOf(Instruction *I, @@ -151,6 +158,16 @@ public: static InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev); + /// Returns a struct describing whether the instruction is either a + /// Select(ICmp(A, B), X, Y), or + /// Select(FCmp(A, B), X, Y) + /// where one of (X, Y) is an increasing loop induction variable, and the + /// other is a PHI value. + // TODO: Support non-monotonic variable. FindLast does not need be restricted + // to increasing loop induction variables. + static InstDesc isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi, + Instruction *I, ScalarEvolution &SE); + /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I); @@ -236,10 +253,25 @@ public: return Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf; } + /// Returns true if the recurrence kind is of the form + /// select(cmp(),x,y) where one of (x,y) is increasing loop induction. + static bool isFindLastIVRecurrenceKind(RecurKind Kind) { + return Kind == RecurKind::IFindLastIV || Kind == RecurKind::FFindLastIV; + } + /// Returns the type of the recurrence. This type can be narrower than the /// actual type of the Phi if the recurrence has been type-promoted. Type *getRecurrenceType() const { return RecurrenceType; } + /// Returns the sentinel value for FindLastIV recurrences to replace the start + /// value. + Value *getSentinelValue() const { + assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind"); + Type *Ty = StartValue->getType(); + return ConstantInt::get(Ty, + APInt::getSignedMinValue(Ty->getIntegerBitWidth())); + } + /// Returns a reference to the instructions used for type-promoting the /// recurrence. const SmallPtrSet<Instruction *, 8> &getCastInsts() const { return CastInsts; } diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index b46124a4ed0d..215139caef69 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -117,6 +117,12 @@ public: /// which is lower overhead and more direct than maintaining this metadata. /// Returns true if memprof metadata attached, false if not (attribute added). bool buildAndAttachMIBMetadata(CallBase *CI); + + /// Add an attribute for the given allocation type to the call instruction. + /// If hinted by reporting is enabled, a message is emitted with the given + /// descriptor used to identify the category of single allocation type. + void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, + StringRef Descriptor); }; /// Helper class to iterate through stack ids in both metadata (memprof MIB and diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index 055feceefb05..b8e08f4b7842 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -260,23 +260,32 @@ private: MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); void tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs); void fixupDefs(const SmallVectorImpl<WeakVH> &); - // Clone all uses and defs from BB to NewBB given a 1:1 map of all - // instructions and blocks cloned, and a map of MemoryPhi : Definition - // (MemoryAccess Phi or Def). VMap maps old instructions to cloned - // instructions and old blocks to cloned blocks. MPhiMap, is created in the - // caller of this private method, and maps existing MemoryPhis to new - // definitions that new MemoryAccesses must point to. These definitions may - // not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such, - // the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses - // may be MemoryPhis or MemoryDefs and not MemoryUses. - // If CloneWasSimplified = true, the clone was exact. Otherwise, assume that - // the clone involved simplifications that may have: (1) turned a MemoryUse - // into an instruction that MemorySSA has no representation for, or (2) turned - // a MemoryDef into a MemoryUse or an instruction that MemorySSA has no - // representation for. No other cases are supported. + /// Clone all uses and defs from BB to NewBB given a 1:1 map of all + /// instructions and blocks cloned, and a map of MemoryPhi : Definition + /// (MemoryAccess Phi or Def). + /// + /// \param VMap Maps old instructions to cloned instructions and old blocks + /// to cloned blocks + /// \param MPhiMap, is created in the caller of this private method, and maps + /// existing MemoryPhis to new definitions that new MemoryAccesses + /// must point to. These definitions may not necessarily be MemoryPhis + /// themselves, they may be MemoryDefs. As such, the map is between + /// MemoryPhis and MemoryAccesses, where the MemoryAccesses may be + /// MemoryPhis or MemoryDefs and not MemoryUses. + /// \param IsInClonedRegion Determines whether a basic block was cloned. + /// References to accesses outside the cloned region will not be + /// remapped. + /// \param CloneWasSimplified If false, the clone was exact. Otherwise, + /// assume that the clone involved simplifications that may have: + /// (1) turned a MemoryUse into an instruction that MemorySSA has no + /// representation for, or (2) turned a MemoryDef into a MemoryUse or + /// an instruction that MemorySSA has no representation for. No other + /// cases are supported. void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap, + function_ref<bool(BasicBlock *)> IsInClonedRegion, bool CloneWasSimplified = false); + template <typename Iter> void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks, Iter ValuesBegin, Iter ValuesEnd, diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h index bbe2741f44fc..c9d3874e7dd9 100644 --- a/llvm/include/llvm/Analysis/PtrUseVisitor.h +++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -64,6 +64,9 @@ public: /// Is the pointer escaped at some point? bool isEscaped() const { return EscapedInfo != nullptr; } + /// Is the pointer escaped into a read-only nocapture call at some point? + bool isEscapedReadOnly() const { return EscapedReadOnly != nullptr; } + /// Get the instruction causing the visit to abort. /// \returns a pointer to the instruction causing the abort if one is /// available; otherwise returns null. @@ -74,6 +77,10 @@ public: /// is available; otherwise returns null. Instruction *getEscapingInst() const { return EscapedInfo; } + /// Get the instruction causing the pointer to escape which is a read-only + /// nocapture call. + Instruction *getEscapedReadOnlyInst() const { return EscapedReadOnly; } + /// Mark the visit as aborted. Intended for use in a void return. /// \param I The instruction which caused the visit to abort, if available. void setAborted(Instruction *I) { @@ -88,6 +95,12 @@ public: EscapedInfo = I; } + /// Mark the pointer as escaped into a readonly-nocapture call. + void setEscapedReadOnly(Instruction *I) { + assert(I && "Expected a valid pointer in setEscapedReadOnly"); + EscapedReadOnly = I; + } + /// Mark the pointer as escaped, and the visit as aborted. Intended /// for use in a void return. /// \param I The instruction which both escapes the pointer and aborts the @@ -100,6 +113,7 @@ public: private: Instruction *AbortedInfo = nullptr; Instruction *EscapedInfo = nullptr; + Instruction *EscapedReadOnly = nullptr; }; protected: diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index b20c6a13cb6b..7879622473ad 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -241,7 +241,7 @@ public: virtual bool isAlwaysTrue() const = 0; /// Returns true if this predicate implies \p N. - virtual bool implies(const SCEVPredicate *N) const = 0; + virtual bool implies(const SCEVPredicate *N, ScalarEvolution &SE) const = 0; /// Prints a textual representation of this predicate with an indentation of /// \p Depth. @@ -286,7 +286,7 @@ public: const SCEV *LHS, const SCEV *RHS); /// Implementation of the SCEVPredicate interface - bool implies(const SCEVPredicate *N) const override; + bool implies(const SCEVPredicate *N, ScalarEvolution &SE) const override; void print(raw_ostream &OS, unsigned Depth = 0) const override; bool isAlwaysTrue() const override; @@ -393,7 +393,7 @@ public: /// Implementation of the SCEVPredicate interface const SCEVAddRecExpr *getExpr() const; - bool implies(const SCEVPredicate *N) const override; + bool implies(const SCEVPredicate *N, ScalarEvolution &SE) const override; void print(raw_ostream &OS, unsigned Depth = 0) const override; bool isAlwaysTrue() const override; @@ -418,16 +418,17 @@ private: SmallVector<const SCEVPredicate *, 16> Preds; /// Adds a predicate to this union. - void add(const SCEVPredicate *N); + void add(const SCEVPredicate *N, ScalarEvolution &SE); public: - SCEVUnionPredicate(ArrayRef<const SCEVPredicate *> Preds); + SCEVUnionPredicate(ArrayRef<const SCEVPredicate *> Preds, + ScalarEvolution &SE); ArrayRef<const SCEVPredicate *> getPredicates() const { return Preds; } /// Implementation of the SCEVPredicate interface bool isAlwaysTrue() const override; - bool implies(const SCEVPredicate *N) const override; + bool implies(const SCEVPredicate *N, ScalarEvolution &SE) const override; void print(raw_ostream &OS, unsigned Depth) const override; /// We estimate the complexity of a union predicate as the size number of @@ -1780,6 +1781,10 @@ private: /// V. const SCEV *getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops); + /// Returns SCEV for the first operand of a phi if all phi operands have + /// identical opcodes and operands. + const SCEV *createNodeForPHIWithIdenticalOperands(PHINode *PN); + /// Provide the special handling we need to analyze PHI SCEVs. const SCEV *createNodeForPHI(PHINode *PN); diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h new file mode 100644 index 000000000000..900f6d0fd05a --- /dev/null +++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h @@ -0,0 +1,153 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a simple and efficient mechanism for performing general +// tree-based pattern matches on SCEVs, based on LLVM's IR pattern matchers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONPATTERNMATCH_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONPATTERNMATCH_H + +#include "llvm/Analysis/ScalarEvolutionExpressions.h" + +namespace llvm { +namespace SCEVPatternMatch { + +template <typename Val, typename Pattern> +bool match(const SCEV *S, const Pattern &P) { + return P.match(S); +} + +template <typename Predicate> struct cst_pred_ty : public Predicate { + bool match(const SCEV *S) { + assert((isa<SCEVCouldNotCompute>(S) || !S->getType()->isVectorTy()) && + "no vector types expected from SCEVs"); + auto *C = dyn_cast<SCEVConstant>(S); + return C && this->isValue(C->getAPInt()); + } +}; + +struct is_zero { + bool isValue(const APInt &C) { return C.isZero(); } +}; +/// Match an integer 0. +inline cst_pred_ty<is_zero> m_scev_Zero() { return cst_pred_ty<is_zero>(); } + +struct is_one { + bool isValue(const APInt &C) { return C.isOne(); } +}; +/// Match an integer 1. +inline cst_pred_ty<is_one> m_scev_One() { return cst_pred_ty<is_one>(); } + +struct is_all_ones { + bool isValue(const APInt &C) { return C.isAllOnes(); } +}; +/// Match an integer with all bits set. +inline cst_pred_ty<is_all_ones> m_scev_AllOnes() { + return cst_pred_ty<is_all_ones>(); +} + +template <typename Class> struct class_match { + template <typename ITy> bool match(ITy *V) const { return isa<Class>(V); } +}; + +template <typename Class> struct bind_ty { + Class *&VR; + + bind_ty(Class *&V) : VR(V) {} + + template <typename ITy> bool match(ITy *V) const { + if (auto *CV = dyn_cast<Class>(V)) { + VR = CV; + return true; + } + return false; + } +}; + +/// Match a SCEV, capturing it if we match. +inline bind_ty<const SCEV> m_SCEV(const SCEV *&V) { return V; } +inline bind_ty<const SCEVConstant> m_SCEVConstant(const SCEVConstant *&V) { + return V; +} +inline bind_ty<const SCEVUnknown> m_SCEVUnknown(const SCEVUnknown *&V) { + return V; +} + +/// Match a specified const SCEV *. +struct specificscev_ty { + const SCEV *Expr; + + specificscev_ty(const SCEV *Expr) : Expr(Expr) {} + + template <typename ITy> bool match(ITy *S) { return S == Expr; } +}; + +/// Match if we have a specific specified SCEV. +inline specificscev_ty m_Specific(const SCEV *S) { return S; } + +/// Match a unary SCEV. +template <typename SCEVTy, typename Op0_t> struct SCEVUnaryExpr_match { + Op0_t Op0; + + SCEVUnaryExpr_match(Op0_t Op0) : Op0(Op0) {} + + bool match(const SCEV *S) { + auto *E = dyn_cast<SCEVTy>(S); + return E && E->getNumOperands() == 1 && Op0.match(E->getOperand(0)); + } +}; + +template <typename SCEVTy, typename Op0_t> +inline SCEVUnaryExpr_match<SCEVTy, Op0_t> m_scev_Unary(const Op0_t &Op0) { + return SCEVUnaryExpr_match<SCEVTy, Op0_t>(Op0); +} + +template <typename Op0_t> +inline SCEVUnaryExpr_match<SCEVSignExtendExpr, Op0_t> +m_scev_SExt(const Op0_t &Op0) { + return m_scev_Unary<SCEVSignExtendExpr>(Op0); +} + +template <typename Op0_t> +inline SCEVUnaryExpr_match<SCEVZeroExtendExpr, Op0_t> +m_scev_ZExt(const Op0_t &Op0) { + return m_scev_Unary<SCEVZeroExtendExpr>(Op0); +} + +/// Match a binary SCEV. +template <typename SCEVTy, typename Op0_t, typename Op1_t> +struct SCEVBinaryExpr_match { + Op0_t Op0; + Op1_t Op1; + + SCEVBinaryExpr_match(Op0_t Op0, Op1_t Op1) : Op0(Op0), Op1(Op1) {} + + bool match(const SCEV *S) { + auto *E = dyn_cast<SCEVTy>(S); + return E && E->getNumOperands() == 2 && Op0.match(E->getOperand(0)) && + Op1.match(E->getOperand(1)); + } +}; + +template <typename SCEVTy, typename Op0_t, typename Op1_t> +inline SCEVBinaryExpr_match<SCEVTy, Op0_t, Op1_t> +m_scev_Binary(const Op0_t &Op0, const Op1_t &Op1) { + return SCEVBinaryExpr_match<SCEVTy, Op0_t, Op1_t>(Op0, Op1); +} + +template <typename Op0_t, typename Op1_t> +inline SCEVBinaryExpr_match<SCEVAddExpr, Op0_t, Op1_t> +m_scev_Add(const Op0_t &Op0, const Op1_t &Op1) { + return m_scev_Binary<SCEVAddExpr>(Op0, Op1); +} + +} // namespace SCEVPatternMatch +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 325c9cd9900b..f51d2bb9d50a 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -10,7 +10,9 @@ #define LLVM_ANALYSIS_TARGETLIBRARYINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/TargetParser/Triple.h" @@ -565,6 +567,16 @@ public: /// \copydoc TargetLibraryInfoImpl::getSizeTSize() unsigned getSizeTSize(const Module &M) const { return Impl->getSizeTSize(M); } + /// Returns an IntegerType corresponding to size_t. + IntegerType *getSizeTType(const Module &M) const { + return IntegerType::get(M.getContext(), getSizeTSize(M)); + } + + /// Returns a constant materialized as a size_t type. + ConstantInt *getAsSizeT(uint64_t V, const Module &M) const { + return ConstantInt::get(getSizeTType(M), V); + } + /// \copydoc TargetLibraryInfoImpl::getIntSize() unsigned getIntSize() const { return Impl->getIntSize(); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index aa530b54c5c6..752313ab1585 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -335,12 +335,10 @@ public: /// chain of loads or stores within same block) operations set when lowered. /// \p AccessTy is the type of the loads/stores that will ultimately use the /// \p Ptrs. - InstructionCost - getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base, - const PointersChainInfo &Info, Type *AccessTy, - TargetCostKind CostKind = TTI::TCK_RecipThroughput - - ) const; + InstructionCost getPointersChainCost( + ArrayRef<const Value *> Ptrs, const Value *Base, + const PointersChainInfo &Info, Type *AccessTy, + TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// \returns A value by which our inlining threshold should be multiplied. /// This is primarily used to bump up the inlining threshold wholesale on @@ -901,14 +899,20 @@ public: bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const; + /// Identifies if the vector form of the intrinsic has a scalar operand. bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const; /// Identifies if the vector form of the intrinsic is overloaded on the type /// of the operand at index \p OpdIdx, or on the return type if \p OpdIdx is /// -1. - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) const; + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) const; + + /// Identifies if the vector form of the intrinsic that returns a struct is + /// overloaded at the struct element index \p RetIdx. + bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) const; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or @@ -2004,8 +2008,11 @@ public: virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) = 0; virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) = 0; - virtual bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) = 0; + virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) = 0; + virtual bool + isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) = 0; virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind, @@ -2582,9 +2589,14 @@ public: return Impl.isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx); } - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) override { - return Impl.isVectorIntrinsicWithOverloadTypeAtArg(ID, ScalarOpdIdx); + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) override { + return Impl.isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx); + } + + bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) override { + return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx); } InstructionCost getScalarizationOverhead(VectorType *Ty, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 48ebffff8cbf..9c74b2a0c31d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -396,9 +396,14 @@ public: return false; } - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) const { - return ScalarOpdIdx == -1; + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) const { + return OpdIdx == -1; + } + + bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) const { + return RetIdx == 0; } InstructionCost getScalarizationOverhead(VectorType *Ty, diff --git a/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h index 36dd39c033aa..e70f35174e4c 100644 --- a/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h @@ -29,7 +29,15 @@ class MemoryLocation; /// A simple AA result that uses TBAA metadata to answer queries. class TypeBasedAAResult : public AAResultBase { + /// True if type sanitizer is enabled. When TypeSanitizer is used, don't use + /// TBAA information for alias analysis as this might cause us to remove + /// memory accesses that we need to verify at runtime. + bool UsingTypeSanitizer; + public: + TypeBasedAAResult(bool UsingTypeSanitizer) + : UsingTypeSanitizer(UsingTypeSanitizer) {} + /// Handle invalidation events from the new pass manager. /// /// By definition, this result is stateless and so remains valid. @@ -52,6 +60,10 @@ public: private: bool Aliases(const MDNode *A, const MDNode *B) const; + + /// Returns true if TBAA metadata should be used, that is if TBAA is enabled + /// and type sanitizer is not used. + bool shouldUseTBAA() const; }; /// Analysis pass providing a never-invalidated alias analysis result. diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index c1016dd7bddd..b72efac0a488 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -18,6 +18,7 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/IR/Module.h" #include "llvm/IR/VFABIDemangler.h" +#include "llvm/IR/VectorTypeUtils.h" #include "llvm/Support/CheckedArithmetic.h" namespace llvm { @@ -127,28 +128,29 @@ namespace Intrinsic { typedef unsigned ID; } -/// A helper function for converting Scalar types to vector types. If -/// the incoming type is void, we return void. If the EC represents a -/// scalar, we return the scalar type. -inline Type *ToVectorTy(Type *Scalar, ElementCount EC) { - if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar()) - return Scalar; - return VectorType::get(Scalar, EC); -} - -inline Type *ToVectorTy(Type *Scalar, unsigned VF) { - return ToVectorTy(Scalar, ElementCount::getFixed(VF)); -} - /// Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all scalars /// for the scalar form of the intrinsic and all vectors (or scalars handled by /// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic. +/// +/// Note: isTriviallyVectorizable implies isTriviallyScalarizable. bool isTriviallyVectorizable(Intrinsic::ID ID); +/// Identify if the intrinsic is trivially scalarizable. +/// This method returns true following the same predicates of +/// isTriviallyVectorizable. + +/// Note: There are intrinsics where implementing vectorization for the +/// intrinsic is redundant, but we want to implement scalarization of the +/// vector. To prevent the requirement that an intrinsic also implements +/// vectorization we provide this seperate function. +bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI); + /// Identifies if the vector form of the intrinsic has a scalar operand. -bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, - unsigned ScalarOpdIdx); +/// \p TTI is used to consider target specific intrinsics, if no target specific +/// intrinsics will be considered then it is appropriate to pass in nullptr. +bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, + const TargetTransformInfo *TTI); /// Identifies if the vector form of the intrinsic is overloaded on the type of /// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1. @@ -158,9 +160,11 @@ bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI); /// Identifies if the vector form of the intrinsic that returns a struct is -/// overloaded at the struct element index \p RetIdx. -bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, - int RetIdx); +/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to +/// consider target specific intrinsics, if no target specific intrinsics +/// will be considered then it is appropriate to pass in nullptr. +bool isVectorIntrinsicWithStructReturnOverloadAtField( + Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI); /// Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 1ef8b8ffc396..8b195b028783 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -91,12 +91,15 @@ namespace llvm { } bool operator<(const ValID &RHS) const { - assert(Kind == RHS.Kind && "Comparing ValIDs of different kinds"); + assert((((Kind == t_LocalID || Kind == t_LocalName) && + (RHS.Kind == t_LocalID || RHS.Kind == t_LocalName)) || + ((Kind == t_GlobalID || Kind == t_GlobalName) && + (RHS.Kind == t_GlobalID || RHS.Kind == t_GlobalName))) && + "Comparing ValIDs of different kinds"); + if (Kind != RHS.Kind) + return Kind < RHS.Kind; if (Kind == t_LocalID || Kind == t_GlobalID) return UIntVal < RHS.UIntVal; - assert((Kind == t_LocalName || Kind == t_GlobalName || - Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && - "Ordering not defined for this ValID kind yet"); return StrVal < RHS.StrVal; } }; diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index fd32a6ec1965..8abacf1b546a 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -127,9 +127,11 @@ enum { // Versioning enum { EV_NONE = 0, EV_CURRENT = 1 }; -// Machine architectures -// See current registered ELF machine architectures at: -// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html +// Machine architectures. +// At the time of writing, the list of registered machine architectures is +// at https://groups.google.com/g/generic-abi/c/0kORSDcyhTE/m/ZRf_PvcHAAAJ +// Please refer to https://groups.google.com/g/generic-abi for any further +// updates. enum { EM_NONE = 0, // No machine EM_M32 = 1, // AT&T WE 32100 @@ -627,6 +629,8 @@ enum { EF_HEXAGON_MACH_V71 = 0x00000071, // Hexagon V71 EF_HEXAGON_MACH_V71T = 0x00008071, // Hexagon V71T EF_HEXAGON_MACH_V73 = 0x00000073, // Hexagon V73 + EF_HEXAGON_MACH_V75 = 0x00000075, // Hexagon V75 + EF_HEXAGON_MACH_V79 = 0x00000079, // Hexagon V79 EF_HEXAGON_MACH = 0x000003ff, // Hexagon V.. // Highest ISA version flags @@ -647,6 +651,7 @@ enum { EF_HEXAGON_ISA_V71 = 0x00000071, // Hexagon V71 ISA EF_HEXAGON_ISA_V73 = 0x00000073, // Hexagon V73 ISA EF_HEXAGON_ISA_V75 = 0x00000075, // Hexagon V75 ISA + EF_HEXAGON_ISA_V79 = 0x00000079, // Hexagon V79 ISA EF_HEXAGON_ISA = 0x000003ff, // Hexagon V.. ISA }; @@ -1724,6 +1729,7 @@ enum : unsigned { NT_ARM_ZA = 0x40c, NT_ARM_ZT = 0x40d, NT_ARM_FPMR = 0x40e, + NT_ARM_GCS = 0x410, NT_FILE = 0x46494c45, NT_PRXFPREG = 0x46e62b7f, diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def index 65a15c2e1562..ac9a089e853a 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def @@ -60,3 +60,68 @@ ELF_RELOC(R_RISCV_TLSDESC_HI20, 62) ELF_RELOC(R_RISCV_TLSDESC_LOAD_LO12, 63) ELF_RELOC(R_RISCV_TLSDESC_ADD_LO12, 64) ELF_RELOC(R_RISCV_TLSDESC_CALL, 65) +ELF_RELOC(R_RISCV_VENDOR, 191) +ELF_RELOC(R_RISCV_CUSTOM192, 192) +ELF_RELOC(R_RISCV_CUSTOM193, 193) +ELF_RELOC(R_RISCV_CUSTOM194, 194) +ELF_RELOC(R_RISCV_CUSTOM195, 195) +ELF_RELOC(R_RISCV_CUSTOM196, 196) +ELF_RELOC(R_RISCV_CUSTOM197, 197) +ELF_RELOC(R_RISCV_CUSTOM198, 198) +ELF_RELOC(R_RISCV_CUSTOM199, 199) +ELF_RELOC(R_RISCV_CUSTOM200, 200) +ELF_RELOC(R_RISCV_CUSTOM201, 201) +ELF_RELOC(R_RISCV_CUSTOM202, 202) +ELF_RELOC(R_RISCV_CUSTOM203, 203) +ELF_RELOC(R_RISCV_CUSTOM204, 204) +ELF_RELOC(R_RISCV_CUSTOM205, 205) +ELF_RELOC(R_RISCV_CUSTOM206, 206) +ELF_RELOC(R_RISCV_CUSTOM207, 207) +ELF_RELOC(R_RISCV_CUSTOM208, 208) +ELF_RELOC(R_RISCV_CUSTOM209, 209) +ELF_RELOC(R_RISCV_CUSTOM210, 210) +ELF_RELOC(R_RISCV_CUSTOM211, 211) +ELF_RELOC(R_RISCV_CUSTOM212, 212) +ELF_RELOC(R_RISCV_CUSTOM213, 213) +ELF_RELOC(R_RISCV_CUSTOM214, 214) +ELF_RELOC(R_RISCV_CUSTOM215, 215) +ELF_RELOC(R_RISCV_CUSTOM216, 216) +ELF_RELOC(R_RISCV_CUSTOM217, 217) +ELF_RELOC(R_RISCV_CUSTOM218, 218) +ELF_RELOC(R_RISCV_CUSTOM219, 219) +ELF_RELOC(R_RISCV_CUSTOM220, 220) +ELF_RELOC(R_RISCV_CUSTOM221, 221) +ELF_RELOC(R_RISCV_CUSTOM222, 222) +ELF_RELOC(R_RISCV_CUSTOM223, 223) +ELF_RELOC(R_RISCV_CUSTOM224, 224) +ELF_RELOC(R_RISCV_CUSTOM225, 225) +ELF_RELOC(R_RISCV_CUSTOM226, 226) +ELF_RELOC(R_RISCV_CUSTOM227, 227) +ELF_RELOC(R_RISCV_CUSTOM228, 228) +ELF_RELOC(R_RISCV_CUSTOM229, 229) +ELF_RELOC(R_RISCV_CUSTOM230, 230) +ELF_RELOC(R_RISCV_CUSTOM231, 231) +ELF_RELOC(R_RISCV_CUSTOM232, 232) +ELF_RELOC(R_RISCV_CUSTOM233, 233) +ELF_RELOC(R_RISCV_CUSTOM234, 234) +ELF_RELOC(R_RISCV_CUSTOM235, 235) +ELF_RELOC(R_RISCV_CUSTOM236, 236) +ELF_RELOC(R_RISCV_CUSTOM237, 237) +ELF_RELOC(R_RISCV_CUSTOM238, 238) +ELF_RELOC(R_RISCV_CUSTOM239, 239) +ELF_RELOC(R_RISCV_CUSTOM240, 240) +ELF_RELOC(R_RISCV_CUSTOM241, 241) +ELF_RELOC(R_RISCV_CUSTOM242, 242) +ELF_RELOC(R_RISCV_CUSTOM243, 243) +ELF_RELOC(R_RISCV_CUSTOM244, 244) +ELF_RELOC(R_RISCV_CUSTOM245, 245) +ELF_RELOC(R_RISCV_CUSTOM246, 246) +ELF_RELOC(R_RISCV_CUSTOM247, 247) +ELF_RELOC(R_RISCV_CUSTOM248, 248) +ELF_RELOC(R_RISCV_CUSTOM249, 249) +ELF_RELOC(R_RISCV_CUSTOM250, 250) +ELF_RELOC(R_RISCV_CUSTOM251, 251) +ELF_RELOC(R_RISCV_CUSTOM252, 252) +ELF_RELOC(R_RISCV_CUSTOM253, 253) +ELF_RELOC(R_RISCV_CUSTOM254, 254) +ELF_RELOC(R_RISCV_CUSTOM255, 255) diff --git a/llvm/include/llvm/BinaryFormat/MinidumpConstants.def b/llvm/include/llvm/BinaryFormat/MinidumpConstants.def index 5226da3e8412..722a70ff67a9 100644 --- a/llvm/include/llvm/BinaryFormat/MinidumpConstants.def +++ b/llvm/include/llvm/BinaryFormat/MinidumpConstants.def @@ -85,6 +85,10 @@ HANDLE_MDMP_STREAM_TYPE(0xFACECCCC, FacebookAppStateLog) HANDLE_MDMP_STREAM_TYPE(0xFACEDEAD, FacebookAbortReason) HANDLE_MDMP_STREAM_TYPE(0xFACEE000, FacebookThreadName) +// LLDB specific stream types +// Ascii for 'LLDB' +HANDLE_MDMP_STREAM_TYPE(0x4C4C4442, LLDBGenerated) + HANDLE_MDMP_ARCH(0x0000, X86) // PROCESSOR_ARCHITECTURE_INTEL HANDLE_MDMP_ARCH(0x0001, MIPS) // PROCESSOR_ARCHITECTURE_MIPS HANDLE_MDMP_ARCH(0x0002, Alpha) // PROCESSOR_ARCHITECTURE_ALPHA diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h index bbcd8a4f29ae..b48976769c0c 100644 --- a/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -333,10 +333,33 @@ enum CFileLangId : uint8_t { TB_CPLUSPLUS = 9 ///< C++ language. }; +// XCOFF specific CPU IDs, defined in AIX OS header: `/usr/include/aouthdr.h`. enum CFileCpuId : uint8_t { - TCPU_PPC64 = 2, ///< PowerPC common architecture 64-bit mode. - TCPU_COM = 3, ///< POWER and PowerPC architecture common. - TCPU_970 = 19 ///< PPC970 - PowerPC 64-bit architecture. + TCPU_INVALID = 0, ///< Invalid id - assumes POWER for old objects. + TCPU_PPC = 1, ///< PowerPC common architecture 32 bit mode. + TCPU_PPC64 = 2, ///< PowerPC common architecture 64-bit mode. + TCPU_COM = 3, ///< POWER and PowerPC architecture common. + TCPU_PWR = 4, ///< POWER common architecture objects. + TCPU_ANY = 5, ///< Mixture of any incompatable POWER + ///< and PowerPC architecture implementations. + TCPU_601 = 6, ///< 601 implementation of PowerPC architecture. + TCPU_603 = 7, ///< 603 implementation of PowerPC architecture. + TCPU_604 = 8, ///< 604 implementation of PowerPC architecture. + + // The following are PowerPC 64-bit architectures. + TCPU_620 = 16, + TCPU_A35 = 17, + TCPU_PWR5 = 18, + TCPU_970 = 19, + TCPU_PWR6 = 20, + TCPU_PWR5X = 22, + TCPU_PWR6E = 23, + TCPU_PWR7 = 24, + TCPU_PWR8 = 25, + TCPU_PWR9 = 26, + TCPU_PWR10 = 27, + + TCPU_PWRX = 224 ///< RS2 implementation of POWER architecture. }; enum SymbolAuxType : uint8_t { @@ -350,6 +373,7 @@ enum SymbolAuxType : uint8_t { StringRef getMappingClassString(XCOFF::StorageMappingClass SMC); StringRef getRelocationTypeString(XCOFF::RelocationType Type); +StringRef getTCPUString(XCOFF::CFileCpuId TCPU); Expected<SmallString<32>> parseParmsType(uint32_t Value, unsigned FixedParmsNum, unsigned FloatingParmsNum); Expected<SmallString<32>> parseParmsTypeWithVecInfo(uint32_t Value, @@ -468,6 +492,7 @@ enum ExtendedTBTableFlag : uint8_t { StringRef getNameForTracebackTableLanguageId(TracebackTable::LanguageID LangId); SmallString<32> getExtendedTBTableFlagString(uint8_t Flag); +XCOFF::CFileCpuId getCpuID(StringRef CPU); struct CsectProperties { CsectProperties(StorageMappingClass SMC, SymbolType ST) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 41909a8fc1d5..21fd27d9838d 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -787,6 +787,7 @@ enum AttributeKindCodes { ATTR_KIND_CORO_ELIDE_SAFE = 98, ATTR_KIND_NO_EXT = 99, ATTR_KIND_NO_DIVERGENCE_SOURCE = 100, + ATTR_KIND_SANITIZE_TYPE = 101, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index f46f07122329..c9f142d64ae9 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -259,6 +259,33 @@ private: return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost; } + /// Checks if the provided mask \p is a splat mask, i.e. it contains only -1 + /// or same non -1 index value and this index value contained at least twice. + /// So, mask <0, -1,-1, -1> is not considered splat (it is just identity), + /// same for <-1, 0, -1, -1> (just a slide), while <2, -1, 2, -1> is a splat + /// with \p Index=2. + static bool isSplatMask(ArrayRef<int> Mask, unsigned NumSrcElts, int &Index) { + // Check that the broadcast index meets at least twice. + bool IsCompared = false; + if (int SplatIdx = PoisonMaskElem; + all_of(enumerate(Mask), [&](const auto &P) { + if (P.value() == PoisonMaskElem) + return P.index() != Mask.size() - 1 || IsCompared; + if (static_cast<unsigned>(P.value()) >= NumSrcElts * 2) + return false; + if (SplatIdx == PoisonMaskElem) { + SplatIdx = P.value(); + return P.index() != Mask.size() - 1; + } + IsCompared = true; + return SplatIdx == P.value(); + })) { + Index = SplatIdx; + return true; + } + return false; + } + protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} @@ -819,9 +846,14 @@ public: return false; } - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) const { - return ScalarOpdIdx == -1; + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) const { + return OpdIdx == -1; + } + + bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) const { + return RetIdx == 0; } /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead. @@ -1009,17 +1041,20 @@ public: return Kind; int NumSrcElts = Ty->getElementCount().getKnownMinValue(); switch (Kind) { - case TTI::SK_PermuteSingleSrc: + case TTI::SK_PermuteSingleSrc: { if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts)) return TTI::SK_Reverse; if (ShuffleVectorInst::isZeroEltSplatMask(Mask, NumSrcElts)) return TTI::SK_Broadcast; + if (isSplatMask(Mask, NumSrcElts, Index)) + return TTI::SK_Broadcast; if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) && (Index + Mask.size()) <= (size_t)NumSrcElts) { SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size()); return TTI::SK_ExtractSubvector; } break; + } case TTI::SK_PermuteTwoSrc: { int NumSubElts; if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask( @@ -1935,6 +1970,8 @@ public: return Cost; } + case Intrinsic::experimental_vector_match: + return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); } // Assume that we need to scalarize this intrinsic.) @@ -2190,6 +2227,35 @@ public: case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); + case Intrinsic::experimental_vector_match: { + auto *SearchTy = cast<VectorType>(ICA.getArgTypes()[0]); + auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]); + unsigned SearchSize = NeedleTy->getNumElements(); + + // If we're not expanding the intrinsic then we assume this is cheap to + // implement. + EVT SearchVT = getTLI()->getValueType(DL, SearchTy); + if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) + return getTypeLegalizationCost(RetTy).first; + + // Approximate the cost based on the expansion code in + // SelectionDAGBuilder. + InstructionCost Cost = 0; + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy, + CostKind, 1, nullptr, nullptr); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy, + CostKind, 0, nullptr, nullptr); + Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt, + CostKind, 0, nullptr); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy, + CmpInst::ICMP_EQ, CostKind); + Cost += + thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind); + Cost *= SearchSize; + Cost += + thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind); + return Cost; + } case Intrinsic::abs: ISD = ISD::ABS; break; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 55c3b72c8e02..94e36e412b0c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -172,17 +172,18 @@ public: /// Set the register bank of \p Reg. /// Does nothing if the RegBank is null. /// This is the counterpart to getRegBank. - void setRegBank(Register Reg, const RegisterBank *RegBank); + void setRegBank(Register Reg, const RegisterBank *RegBank) const; /// If \p MI is COPY, try to combine it. /// Returns true if MI changed. - bool tryCombineCopy(MachineInstr &MI); - bool matchCombineCopy(MachineInstr &MI); - void applyCombineCopy(MachineInstr &MI); + bool tryCombineCopy(MachineInstr &MI) const; + bool matchCombineCopy(MachineInstr &MI) const; + void applyCombineCopy(MachineInstr &MI) const; /// Returns true if \p DefMI precedes \p UseMI or they are the same /// instruction. Both must be in the same basic block. - bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI); + bool isPredecessor(const MachineInstr &DefMI, + const MachineInstr &UseMI) const; /// Returns true if \p DefMI dominates \p UseMI. By definition an /// instruction dominates itself. @@ -190,40 +191,50 @@ public: /// If we haven't been provided with a MachineDominatorTree during /// construction, this function returns a conservative result that tracks just /// a single basic block. - bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI); + bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const; /// If \p MI is extend that consumes the result of a load, try to combine it. /// Returns true if MI changed. - bool tryCombineExtendingLoads(MachineInstr &MI); - bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); - void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); + bool tryCombineExtendingLoads(MachineInstr &MI) const; + bool matchCombineExtendingLoads(MachineInstr &MI, + PreferredTuple &MatchInfo) const; + void applyCombineExtendingLoads(MachineInstr &MI, + PreferredTuple &MatchInfo) const; /// Match (and (load x), mask) -> zextload x - bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchCombineLoadWithAndMask(MachineInstr &MI, + BuildFnTy &MatchInfo) const; /// Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed /// load. - bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchCombineExtractedVectorLoad(MachineInstr &MI, + BuildFnTy &MatchInfo) const; - bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); - void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); + bool matchCombineIndexedLoadStore(MachineInstr &MI, + IndexedLoadStoreMatchInfo &MatchInfo) const; + void applyCombineIndexedLoadStore(MachineInstr &MI, + IndexedLoadStoreMatchInfo &MatchInfo) const; - bool matchSextTruncSextLoad(MachineInstr &MI); - void applySextTruncSextLoad(MachineInstr &MI); + bool matchSextTruncSextLoad(MachineInstr &MI) const; + void applySextTruncSextLoad(MachineInstr &MI) const; /// Match sext_inreg(load p), imm -> sextload p - bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo); - void applySextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo); + bool matchSextInRegOfLoad(MachineInstr &MI, + std::tuple<Register, unsigned> &MatchInfo) const; + void applySextInRegOfLoad(MachineInstr &MI, + std::tuple<Register, unsigned> &MatchInfo) const; /// Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM /// when their source operands are identical. - bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI); - void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI); + bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const; + void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const; /// If a brcond's true block is not the fallthrough, make it so by inverting /// the condition and swapping operands. - bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond); - void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond); + bool matchOptBrCondByInvertingCond(MachineInstr &MI, + MachineInstr *&BrCond) const; + void applyOptBrCondByInvertingCond(MachineInstr &MI, + MachineInstr *&BrCond) const; /// If \p MI is G_CONCAT_VECTORS, try to combine it. /// Returns true if MI changed. @@ -239,21 +250,25 @@ public: /// needed to produce the flattened build_vector. /// /// \pre MI.getOpcode() == G_CONCAT_VECTORS. - bool matchCombineConcatVectors(MachineInstr &MI, SmallVector<Register> &Ops); + bool matchCombineConcatVectors(MachineInstr &MI, + SmallVector<Register> &Ops) const; /// Replace \p MI with a flattened build_vector with \p Ops /// or an implicit_def if \p Ops is empty. - void applyCombineConcatVectors(MachineInstr &MI, SmallVector<Register> &Ops); + void applyCombineConcatVectors(MachineInstr &MI, + SmallVector<Register> &Ops) const; - bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector<Register> &Ops); + bool matchCombineShuffleConcat(MachineInstr &MI, + SmallVector<Register> &Ops) const; /// Replace \p MI with a flattened build_vector with \p Ops /// or an implicit_def if \p Ops is empty. - void applyCombineShuffleConcat(MachineInstr &MI, SmallVector<Register> &Ops); + void applyCombineShuffleConcat(MachineInstr &MI, + SmallVector<Register> &Ops) const; /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS. /// Returns true if MI changed. /// /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. - bool tryCombineShuffleVector(MachineInstr &MI); + bool tryCombineShuffleVector(MachineInstr &MI) const; /// Check if the G_SHUFFLE_VECTOR \p MI can be replaced by a /// concat_vectors. /// \p Ops will contain the operands needed to produce the flattened @@ -261,12 +276,12 @@ public: /// /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. bool matchCombineShuffleVector(MachineInstr &MI, - SmallVectorImpl<Register> &Ops); + SmallVectorImpl<Register> &Ops) const; /// Replace \p MI with a concat_vectors with \p Ops. void applyCombineShuffleVector(MachineInstr &MI, - const ArrayRef<Register> Ops); - bool matchShuffleToExtract(MachineInstr &MI); - void applyShuffleToExtract(MachineInstr &MI); + const ArrayRef<Register> Ops) const; + bool matchShuffleToExtract(MachineInstr &MI) const; + void applyShuffleToExtract(MachineInstr &MI) const; /// Optimize memcpy intrinsics et al, e.g. constant len calls. /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline. @@ -298,101 +313,105 @@ public: /// $addr = G_INDEXED_STORE $val, $base, $offset /// [...] /// $whatever = COPY $addr - bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); + bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0) const; - bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); - void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); + bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const; + void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const; /// Fold (shift (shift base, x), y) -> (shift base (x+y)) - bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo); - void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo); + bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const; + void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const; /// If we have a shift-by-constant of a bitwise logic op that itself has a /// shift-by-constant operand with identical opcode, we may be able to convert /// that into 2 independent shifts followed by the logic op. bool matchShiftOfShiftedLogic(MachineInstr &MI, - ShiftOfShiftedLogic &MatchInfo); + ShiftOfShiftedLogic &MatchInfo) const; void applyShiftOfShiftedLogic(MachineInstr &MI, - ShiftOfShiftedLogic &MatchInfo); + ShiftOfShiftedLogic &MatchInfo) const; - bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Transform a multiply by a power-of-2 value to a left shift. - bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal); - void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal); + bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const; + void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const; // Transform a G_SUB with constant on the RHS to G_ADD. - bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const; // Transform a G_SHL with an extended source into a narrower shift if // possible. - bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData); + bool matchCombineShlOfExtend(MachineInstr &MI, + RegisterImmPair &MatchData) const; void applyCombineShlOfExtend(MachineInstr &MI, - const RegisterImmPair &MatchData); + const RegisterImmPair &MatchData) const; /// Fold away a merge of an unmerge of the corresponding values. - bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo); + bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const; /// Reduce a shift by a constant to an unmerge and a shift on a half sized /// type. This will not produce a shift smaller than \p TargetShiftSize. bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, - unsigned &ShiftVal); - void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal); - bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount); + unsigned &ShiftVal) const; + void applyCombineShiftToUnmerge(MachineInstr &MI, + const unsigned &ShiftVal) const; + bool tryCombineShiftToUnmerge(MachineInstr &MI, + unsigned TargetShiftAmount) const; /// Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z. - bool - matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, - SmallVectorImpl<Register> &Operands); - void - applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, - SmallVectorImpl<Register> &Operands); + bool matchCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl<Register> &Operands) const; + void applyCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl<Register> &Operands) const; /// Transform G_UNMERGE Constant -> Constant1, Constant2, ... bool matchCombineUnmergeConstant(MachineInstr &MI, - SmallVectorImpl<APInt> &Csts); + SmallVectorImpl<APInt> &Csts) const; void applyCombineUnmergeConstant(MachineInstr &MI, - SmallVectorImpl<APInt> &Csts); + SmallVectorImpl<APInt> &Csts) const; /// Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ... - bool - matchCombineUnmergeUndef(MachineInstr &MI, - std::function<void(MachineIRBuilder &)> &MatchInfo); + bool matchCombineUnmergeUndef( + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const; /// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z. - bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); - void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); + bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const; + void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const; /// Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0 - bool matchCombineUnmergeZExtToZExt(MachineInstr &MI); - void applyCombineUnmergeZExtToZExt(MachineInstr &MI); + bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const; + void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const; /// Transform fp_instr(cst) to constant result of the fp operation. - void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst); + void applyCombineConstantFoldFpUnary(MachineInstr &MI, + const ConstantFP *Cst) const; /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. - bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); - void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); + bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const; + void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const; /// Transform PtrToInt(IntToPtr(x)) to x. - void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg); + void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const; /// Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) /// Transform G_ADD y, (G_PTRTOINT x) -> G_PTRTOINT (G_PTR_ADD x, y) - bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, - std::pair<Register, bool> &PtrRegAndCommute); - void applyCombineAddP2IToPtrAdd(MachineInstr &MI, - std::pair<Register, bool> &PtrRegAndCommute); + bool + matchCombineAddP2IToPtrAdd(MachineInstr &MI, + std::pair<Register, bool> &PtrRegAndCommute) const; + void + applyCombineAddP2IToPtrAdd(MachineInstr &MI, + std::pair<Register, bool> &PtrRegAndCommute) const; // Transform G_PTR_ADD (G_PTRTOINT C1), C2 -> C1 + C2 - bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst); - void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst); + bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const; + void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const; /// Transform anyext(trunc(x)) to x. - bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg); + bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const; /// Transform zext(trunc(x)) to x. - bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg); + bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const; /// Transform trunc (shl x, K) to shl (trunc x), K /// if K < VT.getScalarSizeInBits(). @@ -401,118 +420,121 @@ public: /// if K <= (MidVT.getScalarSizeInBits() - VT.getScalarSizeInBits()) /// MidVT is obtained by finding a legal type between the trunc's src and dst /// types. - bool matchCombineTruncOfShift(MachineInstr &MI, - std::pair<MachineInstr *, LLT> &MatchInfo); - void applyCombineTruncOfShift(MachineInstr &MI, - std::pair<MachineInstr *, LLT> &MatchInfo); + bool + matchCombineTruncOfShift(MachineInstr &MI, + std::pair<MachineInstr *, LLT> &MatchInfo) const; + void + applyCombineTruncOfShift(MachineInstr &MI, + std::pair<MachineInstr *, LLT> &MatchInfo) const; /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. - bool matchAnyExplicitUseIsUndef(MachineInstr &MI); + bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const; /// Return true if all register explicit use operands on \p MI are defined by /// a G_IMPLICIT_DEF. - bool matchAllExplicitUsesAreUndef(MachineInstr &MI); + bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const; /// Return true if a G_SHUFFLE_VECTOR instruction \p MI has an undef mask. - bool matchUndefShuffleVectorMask(MachineInstr &MI); + bool matchUndefShuffleVectorMask(MachineInstr &MI) const; /// Return true if a G_STORE instruction \p MI is storing an undef value. - bool matchUndefStore(MachineInstr &MI); + bool matchUndefStore(MachineInstr &MI) const; /// Return true if a G_SELECT instruction \p MI has an undef comparison. - bool matchUndefSelectCmp(MachineInstr &MI); + bool matchUndefSelectCmp(MachineInstr &MI) const; /// Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index. - bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI); + bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const; /// Return true if a G_SELECT instruction \p MI has a constant comparison. If /// true, \p OpIdx will store the operand index of the known selected value. - bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx); + bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const; /// Replace an instruction with a G_FCONSTANT with value \p C. - void replaceInstWithFConstant(MachineInstr &MI, double C); + void replaceInstWithFConstant(MachineInstr &MI, double C) const; /// Replace an instruction with an G_FCONSTANT with value \p CFP. - void replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP); + void replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) const; /// Replace an instruction with a G_CONSTANT with value \p C. - void replaceInstWithConstant(MachineInstr &MI, int64_t C); + void replaceInstWithConstant(MachineInstr &MI, int64_t C) const; /// Replace an instruction with a G_CONSTANT with value \p C. - void replaceInstWithConstant(MachineInstr &MI, APInt C); + void replaceInstWithConstant(MachineInstr &MI, APInt C) const; /// Replace an instruction with a G_IMPLICIT_DEF. - void replaceInstWithUndef(MachineInstr &MI); + void replaceInstWithUndef(MachineInstr &MI) const; /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand. - void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx); + void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const; /// Delete \p MI and replace all of its uses with \p Replacement. - void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement); + void replaceSingleDefInstWithReg(MachineInstr &MI, + Register Replacement) const; /// @brief Replaces the shift amount in \p MI with ShiftAmt % BW /// @param MI - void applyFunnelShiftConstantModulo(MachineInstr &MI); + void applyFunnelShiftConstantModulo(MachineInstr &MI) const; /// Return true if \p MOP1 and \p MOP2 are register operands are defined by /// equivalent instructions. - bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2); + bool matchEqualDefs(const MachineOperand &MOP1, + const MachineOperand &MOP2) const; /// Return true if \p MOP is defined by a G_CONSTANT or splat with a value equal to /// \p C. - bool matchConstantOp(const MachineOperand &MOP, int64_t C); + bool matchConstantOp(const MachineOperand &MOP, int64_t C) const; /// Return true if \p MOP is defined by a G_FCONSTANT or splat with a value exactly /// equal to \p C. - bool matchConstantFPOp(const MachineOperand &MOP, double C); + bool matchConstantFPOp(const MachineOperand &MOP, double C) const; /// @brief Checks if constant at \p ConstIdx is larger than \p MI 's bitwidth /// @param ConstIdx Index of the constant - bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx); + bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const; /// Optimize (cond ? x : x) -> x - bool matchSelectSameVal(MachineInstr &MI); + bool matchSelectSameVal(MachineInstr &MI) const; /// Optimize (x op x) -> x - bool matchBinOpSameVal(MachineInstr &MI); + bool matchBinOpSameVal(MachineInstr &MI) const; /// Check if operand \p OpIdx is zero. - bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx); + bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) const; /// Check if operand \p OpIdx is undef. - bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx); + bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const; /// Check if operand \p OpIdx is known to be a power of 2. - bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx); + bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, + unsigned OpIdx) const; /// Erase \p MI - void eraseInst(MachineInstr &MI); + void eraseInst(MachineInstr &MI) const; /// Return true if MI is a G_ADD which can be simplified to a G_SUB. bool matchSimplifyAddToSub(MachineInstr &MI, - std::tuple<Register, Register> &MatchInfo); + std::tuple<Register, Register> &MatchInfo) const; void applySimplifyAddToSub(MachineInstr &MI, - std::tuple<Register, Register> &MatchInfo); + std::tuple<Register, Register> &MatchInfo) const; /// Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y)) - bool - matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, - InstructionStepsMatchInfo &MatchInfo); + bool matchHoistLogicOpWithSameOpcodeHands( + MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const; /// Replace \p MI with a series of instructions described in \p MatchInfo. void applyBuildInstructionSteps(MachineInstr &MI, - InstructionStepsMatchInfo &MatchInfo); + InstructionStepsMatchInfo &MatchInfo) const; /// Match ashr (shl x, C), C -> sext_inreg (C) bool matchAshrShlToSextInreg(MachineInstr &MI, - std::tuple<Register, int64_t> &MatchInfo); + std::tuple<Register, int64_t> &MatchInfo) const; void applyAshShlToSextInreg(MachineInstr &MI, - std::tuple<Register, int64_t> &MatchInfo); + std::tuple<Register, int64_t> &MatchInfo) const; /// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 - bool matchOverlappingAnd(MachineInstr &MI, - BuildFnTy &MatchInfo); + bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// \return true if \p MI is a G_AND instruction whose operands are x and y /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.) @@ -520,7 +542,7 @@ public: /// \param [in] MI - The G_AND instruction. /// \param [out] Replacement - A register the G_AND should be replaced with on /// success. - bool matchRedundantAnd(MachineInstr &MI, Register &Replacement); + bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const; /// \return true if \p MI is a G_OR instruction whose operands are x and y /// where x | y == x or x | y == y. (E.g., one of operands is all-zeros @@ -529,42 +551,45 @@ public: /// \param [in] MI - The G_OR instruction. /// \param [out] Replacement - A register the G_OR should be replaced with on /// success. - bool matchRedundantOr(MachineInstr &MI, Register &Replacement); + bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const; /// \return true if \p MI is a G_SEXT_INREG that can be erased. - bool matchRedundantSExtInReg(MachineInstr &MI); + bool matchRedundantSExtInReg(MachineInstr &MI) const; /// Combine inverting a result of a compare into the opposite cond code. - bool matchNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate); - void applyNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate); + bool matchNotCmp(MachineInstr &MI, + SmallVectorImpl<Register> &RegsToNegate) const; + void applyNotCmp(MachineInstr &MI, + SmallVectorImpl<Register> &RegsToNegate) const; /// Fold (xor (and x, y), y) -> (and (not x), y) ///{ bool matchXorOfAndWithSameReg(MachineInstr &MI, - std::pair<Register, Register> &MatchInfo); + std::pair<Register, Register> &MatchInfo) const; void applyXorOfAndWithSameReg(MachineInstr &MI, - std::pair<Register, Register> &MatchInfo); + std::pair<Register, Register> &MatchInfo) const; ///} /// Combine G_PTR_ADD with nullptr to G_INTTOPTR - bool matchPtrAddZero(MachineInstr &MI); - void applyPtrAddZero(MachineInstr &MI); + bool matchPtrAddZero(MachineInstr &MI) const; + void applyPtrAddZero(MachineInstr &MI) const; /// Combine G_UREM x, (known power of 2) to an add and bitmasking. - void applySimplifyURemByPow2(MachineInstr &MI); + void applySimplifyURemByPow2(MachineInstr &MI) const; /// Push a binary operator through a select on constants. /// /// binop (select cond, K0, K1), K2 -> /// select cond, (binop K0, K2), (binop K1, K2) - bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo); - void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo); + bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const; + void applyFoldBinOpIntoSelect(MachineInstr &MI, + const unsigned &SelectOpNo) const; bool matchCombineInsertVecElts(MachineInstr &MI, - SmallVectorImpl<Register> &MatchInfo); + SmallVectorImpl<Register> &MatchInfo) const; void applyCombineInsertVecElts(MachineInstr &MI, - SmallVectorImpl<Register> &MatchInfo); + SmallVectorImpl<Register> &MatchInfo) const; /// Match expression trees of the form /// @@ -575,145 +600,148 @@ public: /// /// And check if the tree can be replaced with a M-bit load + possibly a /// bswap. - bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const; - bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); - void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); + bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const; + void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const; - bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg); - void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg); + bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const; + void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const; bool matchExtractAllEltsFromBuildVector( MachineInstr &MI, - SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo); + SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo) const; void applyExtractAllEltsFromBuildVector( MachineInstr &MI, - SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo); + SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo) const; /// Use a function which takes in a MachineIRBuilder to perform a combine. /// By default, it erases the instruction \p MI from the function. - void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo); + void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Use a function which takes in a MachineIRBuilder to perform a combine. /// This variant does not erase \p MI after calling the build function. - void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo); + void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const; - bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo); - bool matchFunnelShiftToRotate(MachineInstr &MI); - void applyFunnelShiftToRotate(MachineInstr &MI); - bool matchRotateOutOfRange(MachineInstr &MI); - void applyRotateOutOfRange(MachineInstr &MI); + bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo) const; + bool matchFunnelShiftToRotate(MachineInstr &MI) const; + void applyFunnelShiftToRotate(MachineInstr &MI) const; + bool matchRotateOutOfRange(MachineInstr &MI) const; + void applyRotateOutOfRange(MachineInstr &MI) const; - bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo); - void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo); + bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const; + void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const; /// \returns true if a G_ICMP instruction \p MI can be replaced with a true /// or false constant based off of KnownBits information. - bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo); + bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, + int64_t &MatchInfo) const; /// \returns true if a G_ICMP \p MI can be replaced with its LHS based off of /// KnownBits information. - bool - matchICmpToLHSKnownBits(MachineInstr &MI, - BuildFnTy &MatchInfo); + bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2) - bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const; bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; /// Match: and (lshr x, cst), mask -> ubfx x, cst, width - bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchBitfieldExtractFromAnd(MachineInstr &MI, + BuildFnTy &MatchInfo) const; /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width - bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchBitfieldExtractFromShr(MachineInstr &MI, + BuildFnTy &MatchInfo) const; /// Match: shr (and x, n), k -> ubfx x, pos, width - bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, + BuildFnTy &MatchInfo) const; // Helpers for reassociation: bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, - MachineInstr *RHS, BuildFnTy &MatchInfo); + MachineInstr *RHS, + BuildFnTy &MatchInfo) const; /// Reassociate pointer calculations with G_ADD involved, to allow better /// addressing mode usage. - bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Try to reassociate to reassociate operands of a commutative binop. bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, - Register Op1, BuildFnTy &MatchInfo); + Register Op1, BuildFnTy &MatchInfo) const; /// Reassociate commutative binary operations like G_ADD. - bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Do constant folding when opportunities are exposed after MIR building. - bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo); + bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const; /// Do constant folding when opportunities are exposed after MIR building. - bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo); + bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const; /// Do constant FP folding when opportunities are exposed after MIR building. - bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo); + bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const; /// Constant fold G_FMA/G_FMAD. - bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo); + bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const; /// \returns true if it is possible to narrow the width of a scalar binop /// feeding a G_AND instruction \p MI. - bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Given an G_UDIV \p MI expressing a divide by constant, return an /// expression that implements it by multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". - MachineInstr *buildUDivUsingMul(MachineInstr &MI); + MachineInstr *buildUDivUsingMul(MachineInstr &MI) const; /// Combine G_UDIV by constant into a multiply by magic constant. - bool matchUDivByConst(MachineInstr &MI); - void applyUDivByConst(MachineInstr &MI); + bool matchUDivByConst(MachineInstr &MI) const; + void applyUDivByConst(MachineInstr &MI) const; /// Given an G_SDIV \p MI expressing a signed divide by constant, return an /// expression that implements it by multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". - MachineInstr *buildSDivUsingMul(MachineInstr &MI); - bool matchSDivByConst(MachineInstr &MI); - void applySDivByConst(MachineInstr &MI); + MachineInstr *buildSDivUsingMul(MachineInstr &MI) const; + bool matchSDivByConst(MachineInstr &MI) const; + void applySDivByConst(MachineInstr &MI) const; /// Given an G_SDIV \p MI expressing a signed divided by a pow2 constant, /// return expressions that implements it by shifting. - bool matchDivByPow2(MachineInstr &MI, bool IsSigned); - void applySDivByPow2(MachineInstr &MI); + bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const; + void applySDivByPow2(MachineInstr &MI) const; /// Given an G_UDIV \p MI expressing an unsigned divided by a pow2 constant, /// return expressions that implements it by shifting. - void applyUDivByPow2(MachineInstr &MI); + void applyUDivByPow2(MachineInstr &MI) const; // G_UMULH x, (1 << c)) -> x >> (bitwidth - c) - bool matchUMulHToLShr(MachineInstr &MI); - void applyUMulHToLShr(MachineInstr &MI); + bool matchUMulHToLShr(MachineInstr &MI) const; + void applyUMulHToLShr(MachineInstr &MI) const; /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. - bool tryCombine(MachineInstr &MI); + bool tryCombine(MachineInstr &MI) const; /// Emit loads and stores that perform the given memcpy. /// Assumes \p MI is a G_MEMCPY_INLINE /// TODO: implement dynamically sized inline memcpy, /// and rename: s/bool tryEmit/void emit/ - bool tryEmitMemcpyInline(MachineInstr &MI); + bool tryEmitMemcpyInline(MachineInstr &MI) const; /// Match: /// (G_UMULO x, 2) -> (G_UADDO x, x) /// (G_SMULO x, 2) -> (G_SADDO x, x) - bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Match: /// (G_*MULO x, 0) -> 0 + no carry out - bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Match: /// (G_*ADDE x, y, 0) -> (G_*ADDO x, y) /// (G_*SUBE x, y, 0) -> (G_*SUBO x, y) - bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Transform (fadd x, fneg(y)) -> (fsub x, y) /// (fadd fneg(x), y) -> (fsub y, x) @@ -722,79 +750,85 @@ public: /// (fdiv fneg(x), fneg(y)) -> (fdiv x, y) /// (fmad fneg(x), fneg(y), z) -> (fmad x, y, z) /// (fma fneg(x), fneg(y), z) -> (fma x, y, z) - bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const; - bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo); - void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo); + bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const; + void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const; bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, - bool CanReassociate = false); + bool CanReassociate = false) const; /// Transform (fadd (fmul x, y), z) -> (fma x, y, z) /// (fadd (fmul x, y), z) -> (fmad x, y, z) - bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo) const; /// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) /// (fadd (fpext (fmul x, y)), z) -> (fmad (fpext x), (fpext y), z) bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; /// Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) /// (fadd (fmad x, y, (fmul u, v)), z) -> (fmad x, y, (fmad u, v, z)) bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; // Transform (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) // (fadd (fmad x, y, (fpext (fmul u, v))), z) // -> (fmad x, y, (fmad (fpext u), (fpext v), z)) - bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, - BuildFnTy &MatchInfo); + bool + matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, + BuildFnTy &MatchInfo) const; /// Transform (fsub (fmul x, y), z) -> (fma x, y, -z) /// (fsub (fmul x, y), z) -> (fmad x, y, -z) - bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo) const; /// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) /// (fsub (fneg (fmul, x, y)), z) -> (fmad (fneg x), y, (fneg z)) bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; /// Transform (fsub (fpext (fmul x, y)), z) /// -> (fma (fpext x), (fpext y), (fneg z)) /// (fsub (fpext (fmul x, y)), z) /// -> (fmad (fpext x), (fpext y), (fneg z)) bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; /// Transform (fsub (fpext (fneg (fmul x, y))), z) /// -> (fneg (fma (fpext x), (fpext y), z)) /// (fsub (fpext (fneg (fmul x, y))), z) /// -> (fneg (fmad (fpext x), (fpext y), z)) bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; - bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info); + bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const; /// Transform G_ADD(x, G_SUB(y, x)) to y. /// Transform G_ADD(G_SUB(y, x), x) to y. - bool matchAddSubSameReg(MachineInstr &MI, Register &Src); + bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const; - bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo); - bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo); - bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo); + bool matchBuildVectorIdentityFold(MachineInstr &MI, + Register &MatchInfo) const; + bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const; + bool matchTruncLshrBuildVectorFold(MachineInstr &MI, + Register &MatchInfo) const; /// Transform: /// (x + y) - y -> x /// (x + y) - x -> y /// x - (y + x) -> 0 - y /// x - (x + z) -> 0 - z - bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// \returns true if it is possible to simplify a select instruction \p MI /// to a min/max instruction of some sort. - bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchSimplifySelectToMinMax(MachineInstr &MI, + BuildFnTy &MatchInfo) const; /// Transform: /// (X + Y) == X -> Y == 0 @@ -803,144 +837,160 @@ public: /// (X + Y) != X -> Y != 0 /// (X - Y) != X -> Y != 0 /// (X ^ Y) != X -> Y != 0 - bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchRedundantBinOpInEquality(MachineInstr &MI, + BuildFnTy &MatchInfo) const; /// Match shifts greater or equal to the bitwidth of the operation. - bool matchShiftsTooBig(MachineInstr &MI); + bool matchShiftsTooBig(MachineInstr &MI) const; /// Match constant LHS ops that should be commuted. - bool matchCommuteConstantToRHS(MachineInstr &MI); + bool matchCommuteConstantToRHS(MachineInstr &MI) const; /// Combine sext of trunc. - bool matchSextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchSextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo) const; /// Combine zext of trunc. - bool matchZextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchZextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo) const; /// Combine zext nneg to sext. - bool matchNonNegZext(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchNonNegZext(const MachineOperand &MO, BuildFnTy &MatchInfo) const; /// Match constant LHS FP ops that should be commuted. - bool matchCommuteFPConstantToRHS(MachineInstr &MI); + bool matchCommuteFPConstantToRHS(MachineInstr &MI) const; // Given a binop \p MI, commute operands 1 and 2. - void applyCommuteBinOpOperands(MachineInstr &MI); + void applyCommuteBinOpOperands(MachineInstr &MI) const; /// Combine select to integer min/max. - bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const; + + /// Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)). + bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Combine selects. - bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Combine ands. - bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Combine ors. - bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// trunc (binop X, C) --> binop (trunc X, trunc C). bool matchNarrowBinop(const MachineInstr &TruncMI, - const MachineInstr &BinopMI, BuildFnTy &MatchInfo); + const MachineInstr &BinopMI, + BuildFnTy &MatchInfo) const; - bool matchCastOfInteger(const MachineInstr &CastMI, APInt &MatchInfo); + bool matchCastOfInteger(const MachineInstr &CastMI, APInt &MatchInfo) const; /// Combine addos. - bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Combine extract vector element. - bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Combine extract vector element with a build vector on the vector register. bool matchExtractVectorElementWithBuildVector(const MachineInstr &MI, const MachineInstr &MI2, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; /// Combine extract vector element with a build vector trunc on the vector /// register. - bool matchExtractVectorElementWithBuildVectorTrunc(const MachineOperand &MO, - BuildFnTy &MatchInfo); + bool + matchExtractVectorElementWithBuildVectorTrunc(const MachineOperand &MO, + BuildFnTy &MatchInfo) const; /// Combine extract vector element with a shuffle vector on the vector /// register. bool matchExtractVectorElementWithShuffleVector(const MachineInstr &MI, const MachineInstr &MI2, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; /// Combine extract vector element with a insert vector element on the vector /// register and different indices. - bool matchExtractVectorElementWithDifferentIndices(const MachineOperand &MO, - BuildFnTy &MatchInfo); + bool + matchExtractVectorElementWithDifferentIndices(const MachineOperand &MO, + BuildFnTy &MatchInfo) const; /// Remove references to rhs if it is undef - bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not /// reference a. - bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const; /// Use a function which takes in a MachineIRBuilder to perform a combine. /// By default, it erases the instruction def'd on \p MO from the function. - void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo); + void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const; /// Match FPOWI if it's safe to extend it into a series of multiplications. - bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent); + bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const; /// Expands FPOWI into a series of multiplications and a division if the /// exponent is negative. - void applyExpandFPowI(MachineInstr &MI, int64_t Exponent); + void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const; /// Combine insert vector element OOB. - bool matchInsertVectorElementOOB(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchInsertVectorElementOOB(MachineInstr &MI, + BuildFnTy &MatchInfo) const; bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; - bool matchAddOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchAddOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo) const; - bool matchMulOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchMulOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo) const; - bool matchSubOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchSubOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo) const; - bool matchShlOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchShlOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo) const; /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x). bool matchTruncateOfExt(const MachineInstr &Root, const MachineInstr &ExtMI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; bool matchCastOfSelect(const MachineInstr &Cast, const MachineInstr &SelectMI, - BuildFnTy &MatchInfo); - bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; + bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, + BuildFnTy &MatchInfo) const; - bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, + BuildFnTy &MatchInfo) const; - bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, + BuildFnTy &MatchInfo) const; - bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, + BuildFnTy &MatchInfo) const; // fold ((A-C1)+C2) -> (A+(C2-C1)) - bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, + BuildFnTy &MatchInfo) const; bool matchExtOfExt(const MachineInstr &FirstMI, const MachineInstr &SecondMI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; bool matchCastOfBuildVector(const MachineInstr &CastMI, - const MachineInstr &BVMI, BuildFnTy &MatchInfo); + const MachineInstr &BVMI, + BuildFnTy &MatchInfo) const; - bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); - bool matchCanonicalizeFCmp(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchCanonicalizeICmp(const MachineInstr &MI, + BuildFnTy &MatchInfo) const; + bool matchCanonicalizeFCmp(const MachineInstr &MI, + BuildFnTy &MatchInfo) const; // unmerge_values(anyext(build vector)) -> build vector(anyext) bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; // merge_values(_, undef) -> anyext - bool matchMergeXAndUndef(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchMergeXAndUndef(const MachineInstr &MI, BuildFnTy &MatchInfo) const; // merge_values(_, zero) -> zext - bool matchMergeXAndZero(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchMergeXAndZero(const MachineInstr &MI, BuildFnTy &MatchInfo) const; // overflow sub - bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const; private: /// Checks for legality of an indexed variant of \p LdSt. @@ -950,14 +1000,14 @@ private: /// /// \returns true if a candidate is found. bool findPostIndexCandidate(GLoadStore &MI, Register &Addr, Register &Base, - Register &Offset, bool &RematOffset); + Register &Offset, bool &RematOffset) const; /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a pre-indexing operation. /// /// \returns true if a candidate is found. bool findPreIndexCandidate(GLoadStore &MI, Register &Addr, Register &Base, - Register &Offset); + Register &Offset) const; /// Helper function for matchLoadOrCombine. Searches for Registers /// which may have been produced by a load instruction + some arithmetic. @@ -983,12 +1033,12 @@ private: findLoadOffsetsForLoadOrCombine( SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, const SmallVector<Register, 8> &RegsToVisit, - const unsigned MemSizeInBits); + const unsigned MemSizeInBits) const; /// Examines the G_PTR_ADD instruction \p PtrAdd and determines if performing /// a re-association of its operands would break an existing legal addressing /// mode that the address computation currently represents. - bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd); + bool reassociationCanBreakAddressingModePattern(MachineInstr &PtrAdd) const; /// Behavior when a floating point min/max is given one NaN and one /// non-NaN as input. @@ -1031,36 +1081,36 @@ private: /// select (fcmp uge x, 1.0) x, 1.0 -> fmax x, 1.0 /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0 bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal, - Register FalseVal, BuildFnTy &MatchInfo); + Register FalseVal, BuildFnTy &MatchInfo) const; /// Try to fold selects to logical operations. - bool tryFoldBoolSelectToLogic(GSelect *Select, BuildFnTy &MatchInfo); + bool tryFoldBoolSelectToLogic(GSelect *Select, BuildFnTy &MatchInfo) const; - bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo); + bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo) const; - bool isOneOrOneSplat(Register Src, bool AllowUndefs); - bool isZeroOrZeroSplat(Register Src, bool AllowUndefs); + bool isOneOrOneSplat(Register Src, bool AllowUndefs) const; + bool isZeroOrZeroSplat(Register Src, bool AllowUndefs) const; bool isConstantSplatVector(Register Src, int64_t SplatValue, - bool AllowUndefs); + bool AllowUndefs) const; bool isConstantOrConstantVectorI(Register Src) const; - std::optional<APInt> getConstantOrConstantSplatVector(Register Src); + std::optional<APInt> getConstantOrConstantSplatVector(Register Src) const; /// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2) /// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2) /// into a single comparison using range-based reasoning. bool tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, - BuildFnTy &MatchInfo); + BuildFnTy &MatchInfo) const; // Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y. - bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo); + bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const; bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const; bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst, - const GIConstant &RHSCst, BuildFnTy &MatchInfo); + const GIConstant &RHSCst, BuildFnTy &MatchInfo) const; bool constantFoldFCmp(const GFCmp &FCmp, const GFConstant &LHSCst, - const GFConstant &RHSCst, BuildFnTy &MatchInfo); + const GFConstant &RHSCst, BuildFnTy &MatchInfo) const; }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h index 9f8eb030a96c..2c57f2b5aa02 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h @@ -219,12 +219,13 @@ bool GIMatchTableExecutor::executeMatchTable( assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); unsigned Opcode = State.MIs[InsnID]->getOpcode(); - DEBUG_WITH_TYPE(TgtExecutor::getName(), - dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID - << "], ExpectedOpcode=" << Expected0; - if (MatcherOpcode == GIM_CheckOpcodeIsEither) dbgs() - << " || " << Expected1; - dbgs() << ") // Got=" << Opcode << "\n";); + DEBUG_WITH_TYPE(TgtExecutor::getName(), { + dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID + << "], ExpectedOpcode=" << Expected0; + if (MatcherOpcode == GIM_CheckOpcodeIsEither) + dbgs() << " || " << Expected1; + dbgs() << ") // Got=" << Opcode << "\n"; + }); if (Opcode != Expected0 && Opcode != Expected1) { if (handleReject() == RejectAndGiveUp) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 9dea4c1b412d..0d2ff098a15e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -70,7 +70,7 @@ public: // aext(trunc x) - > aext/copy/trunc x Register TruncSrc; if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { - LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, Observer); @@ -112,7 +112,7 @@ public: return true; } } - return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs, Observer); } bool tryCombineZExt(MachineInstr &MI, @@ -136,7 +136,7 @@ public: if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || isConstantUnsupported(DstTy)) return false; - LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); LLT SrcTy = MRI.getType(SrcReg); APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); if (SextSrc && (DstTy != MRI.getType(SextSrc))) @@ -187,7 +187,7 @@ public: return true; } } - return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs, Observer); } bool tryCombineSExt(MachineInstr &MI, @@ -207,7 +207,7 @@ public: LLT DstTy = MRI.getType(DstReg); if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) return false; - LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); LLT SrcTy = MRI.getType(SrcReg); uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); if (DstTy != MRI.getType(TruncSrc)) @@ -252,7 +252,7 @@ public: } } - return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs, Observer); } bool tryCombineTrunc(MachineInstr &MI, @@ -360,7 +360,7 @@ public: LLT FoundRegTy = MRI.getType(FoundReg); if (DstTy == FoundRegTy) { LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): " - << MI;); + << MI); replaceRegOrBuildCopy(DstReg, FoundReg, MRI, Builder, UpdatedDefs, Observer); @@ -376,7 +376,8 @@ public: /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). bool tryFoldImplicitDef(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DeadInsts, - SmallVectorImpl<Register> &UpdatedDefs) { + SmallVectorImpl<Register> &UpdatedDefs, + GISelObserverWrapper &Observer) { unsigned Opcode = MI.getOpcode(); assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || Opcode == TargetOpcode::G_SEXT); @@ -391,16 +392,20 @@ public: // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) return false; - LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); - Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); + LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI); + auto Impl = Builder.buildUndef(DstTy); + replaceRegOrBuildCopy(DstReg, Impl.getReg(0), MRI, Builder, UpdatedDefs, + Observer); UpdatedDefs.push_back(DstReg); } else { // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. if (isConstantUnsupported(DstTy)) return false; - LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); - Builder.buildConstant(DstReg, 0); + LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI); + auto Cnst = Builder.buildConstant(DstTy, 0); + replaceRegOrBuildCopy(DstReg, Cnst.getReg(0), MRI, Builder, UpdatedDefs, + Observer); UpdatedDefs.push_back(DstReg); } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index ea6ed322e9b1..78a92c86b91e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -338,7 +338,7 @@ template <> struct bind_helper<MachineInstr *> { }; template <> struct bind_helper<LLT> { - static bool bind(const MachineRegisterInfo &MRI, LLT Ty, Register Reg) { + static bool bind(const MachineRegisterInfo &MRI, LLT &Ty, Register Reg) { Ty = MRI.getType(Reg); if (Ty.isValid()) return true; @@ -368,10 +368,40 @@ template <typename Class> struct bind_ty { inline bind_ty<Register> m_Reg(Register &R) { return R; } inline bind_ty<MachineInstr *> m_MInstr(MachineInstr *&MI) { return MI; } -inline bind_ty<LLT> m_Type(LLT Ty) { return Ty; } +inline bind_ty<LLT> m_Type(LLT &Ty) { return Ty; } inline bind_ty<CmpInst::Predicate> m_Pred(CmpInst::Predicate &P) { return P; } inline operand_type_match m_Pred() { return operand_type_match(); } +template <typename BindTy> struct deferred_helper { + static bool match(const MachineRegisterInfo &MRI, BindTy &VR, BindTy &V) { + return VR == V; + } +}; + +template <> struct deferred_helper<LLT> { + static bool match(const MachineRegisterInfo &MRI, LLT VT, Register R) { + return VT == MRI.getType(R); + } +}; + +template <typename Class> struct deferred_ty { + Class &VR; + + deferred_ty(Class &V) : VR(V) {} + + template <typename ITy> bool match(const MachineRegisterInfo &MRI, ITy &&V) { + return deferred_helper<Class>::match(MRI, VR, V); + } +}; + +/// Similar to m_SpecificReg/Type, but the specific value to match originated +/// from an earlier sub-pattern in the same mi_match expression. For example, +/// we cannot match `(add X, X)` with `m_GAdd(m_Reg(X), m_SpecificReg(X))` +/// because `X` is not initialized at the time it's passed to `m_SpecificReg`. +/// Instead, we can use `m_GAdd(m_Reg(x), m_DeferredReg(X))`. +inline deferred_ty<Register> m_DeferredReg(Register &R) { return R; } +inline deferred_ty<LLT> m_DeferredType(LLT &Ty) { return Ty; } + struct ImplicitDefMatch { bool match(const MachineRegisterInfo &MRI, Register Reg) { MachineInstr *TmpMI; @@ -401,8 +431,13 @@ struct BinaryOp_match { if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) { return (L.match(MRI, TmpMI->getOperand(1).getReg()) && R.match(MRI, TmpMI->getOperand(2).getReg())) || - (Commutable && (R.match(MRI, TmpMI->getOperand(1).getReg()) && - L.match(MRI, TmpMI->getOperand(2).getReg()))); + // NOTE: When trying the alternative operand ordering + // with a commutative operation, it is imperative to always run + // the LHS sub-pattern (i.e. `L`) before the RHS sub-pattern + // (i.e. `R`). Otherwsie, m_DeferredReg/Type will not work as + // expected. + (Commutable && (L.match(MRI, TmpMI->getOperand(2).getReg()) && + R.match(MRI, TmpMI->getOperand(1).getReg()))); } } return false; @@ -426,8 +461,13 @@ struct BinaryOpc_match { TmpMI->getNumOperands() == 3) { return (L.match(MRI, TmpMI->getOperand(1).getReg()) && R.match(MRI, TmpMI->getOperand(2).getReg())) || - (Commutable && (R.match(MRI, TmpMI->getOperand(1).getReg()) && - L.match(MRI, TmpMI->getOperand(2).getReg()))); + // NOTE: When trying the alternative operand ordering + // with a commutative operation, it is imperative to always run + // the LHS sub-pattern (i.e. `L`) before the RHS sub-pattern + // (i.e. `R`). Otherwsie, m_DeferredReg/Type will not work as + // expected. + (Commutable && (L.match(MRI, TmpMI->getOperand(2).getReg()) && + R.match(MRI, TmpMI->getOperand(1).getReg()))); } } return false; @@ -538,15 +578,27 @@ m_GAShr(const LHS &L, const RHS &R) { } template <typename LHS, typename RHS> -inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SMAX, false> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SMAX, true> m_GSMax(const LHS &L, const RHS &R) { - return BinaryOp_match<LHS, RHS, TargetOpcode::G_SMAX, false>(L, R); + return BinaryOp_match<LHS, RHS, TargetOpcode::G_SMAX, true>(L, R); } template <typename LHS, typename RHS> -inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SMIN, false> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SMIN, true> m_GSMin(const LHS &L, const RHS &R) { - return BinaryOp_match<LHS, RHS, TargetOpcode::G_SMIN, false>(L, R); + return BinaryOp_match<LHS, RHS, TargetOpcode::G_SMIN, true>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_UMAX, true> +m_GUMax(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_UMAX, true>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_UMIN, true> +m_GUMin(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_UMIN, true>(L, R); } // Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc @@ -662,6 +714,10 @@ struct CompareOp_match { Register RHS = TmpMI->getOperand(3).getReg(); if (L.match(MRI, LHS) && R.match(MRI, RHS)) return true; + // NOTE: When trying the alternative operand ordering + // with a commutative operation, it is imperative to always run + // the LHS sub-pattern (i.e. `L`) before the RHS sub-pattern + // (i.e. `R`). Otherwsie, m_DeferredReg/Type will not work as expected. if (Commutable && L.match(MRI, RHS) && R.match(MRI, LHS) && P.match(MRI, CmpInst::getSwappedPredicate(TmpPred))) return true; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 37653631cc23..a35ecae5d18b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -171,6 +171,10 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R); +/// Returns the inverse opcode of \p MinMaxOpc, which is a generic min/max +/// opcode like G_SMIN. +unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc); + /// If \p VReg is defined by a G_CONSTANT, return the corresponding value. std::optional<APInt> getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI); @@ -522,6 +526,13 @@ std::optional<APInt> isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI); +/// Determines if \p MI defines a float constant integer or a splat vector of +/// float constant integers. +/// \returns the float constant or std::nullopt. +std::optional<APFloat> +isConstantOrConstantSplatVectorFP(MachineInstr &MI, + const MachineRegisterInfo &MRI); + /// Attempt to match a unary predicate against a scalar/splat constant or every /// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source /// value was undef. diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 0b6d155b6d16..604dc9419025 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1490,22 +1490,15 @@ enum NodeType { BUILTIN_OP_END }; -/// FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations -/// which cannot raise FP exceptions should be less than this value. -/// Those that do must not be less than this value. -static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400; - -/// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations -/// which do not reference a specific memory location should be less than -/// this value. Those that do must not be less than this value, and can -/// be used with SelectionDAG::getMemIntrinsicNode. -static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500; - /// Whether this is bitwise logic opcode. inline bool isBitwiseLogicOp(unsigned Opcode) { return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR; } +/// Given a \p MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns +/// ISD::(U|S)MAX and ISD::(U|S)MIN, respectively. +NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc); + /// Get underlying scalar opcode for VECREDUCE opcode. /// For example ISD::AND for ISD::VECREDUCE_AND. NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode); diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h index 486392ca3c49..373f4402dd8d 100644 --- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h +++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h @@ -48,7 +48,7 @@ class LiveRegMatrix { unsigned UserTag = 0; // The matrix is represented as a LiveIntervalUnion per register unit. - LiveIntervalUnion::Allocator LIUAlloc; + std::unique_ptr<LiveIntervalUnion::Allocator> LIUAlloc; LiveIntervalUnion::Array Matrix; // Cached queries per register unit. @@ -59,15 +59,12 @@ class LiveRegMatrix { unsigned RegMaskVirtReg = 0; BitVector RegMaskUsable; - LiveRegMatrix() = default; + LiveRegMatrix() + : LIUAlloc(std::make_unique<LiveIntervalUnion::Allocator>()) {}; void releaseMemory(); public: - LiveRegMatrix(LiveRegMatrix &&Other) - : TRI(Other.TRI), LIS(Other.LIS), VRM(Other.VRM), UserTag(Other.UserTag), - Matrix(std::move(Other.Matrix)), Queries(std::move(Other.Queries)), - RegMaskTag(Other.RegMaskTag), RegMaskVirtReg(Other.RegMaskVirtReg), - RegMaskUsable(std::move(Other.RegMaskUsable)) {} + LiveRegMatrix(LiveRegMatrix &&Other) = default; void init(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM); diff --git a/llvm/include/llvm/CodeGen/LiveStacks.h b/llvm/include/llvm/CodeGen/LiveStacks.h index 2edc2985f0ee..02c640bfc4a9 100644 --- a/llvm/include/llvm/CodeGen/LiveStacks.h +++ b/llvm/include/llvm/CodeGen/LiveStacks.h @@ -17,6 +17,7 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" #include <cassert> @@ -32,7 +33,7 @@ class raw_ostream; class TargetRegisterClass; class TargetRegisterInfo; -class LiveStacks : public MachineFunctionPass { +class LiveStacks { const TargetRegisterInfo *TRI = nullptr; /// Special pool allocator for VNInfo's (LiveInterval val#). @@ -47,12 +48,6 @@ class LiveStacks : public MachineFunctionPass { std::map<int, const TargetRegisterClass *> S2RCMap; public: - static char ID; // Pass identification, replacement for typeid - - LiveStacks() : MachineFunctionPass(ID) { - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - } - using iterator = SS2IntervalMap::iterator; using const_iterator = SS2IntervalMap::const_iterator; @@ -92,6 +87,25 @@ public: VNInfo::Allocator &getVNInfoAllocator() { return VNInfoAllocator; } + void releaseMemory(); + /// init - analysis entry point + void init(MachineFunction &MF); + void print(raw_ostream &O, const Module *M = nullptr) const; +}; + +class LiveStacksWrapperLegacy : public MachineFunctionPass { + LiveStacks Impl; + +public: + static char ID; // Pass identification, replacement for typeid + + LiveStacksWrapperLegacy() : MachineFunctionPass(ID) { + initializeLiveStacksWrapperLegacyPass(*PassRegistry::getPassRegistry()); + } + + LiveStacks &getLS() { return Impl; } + const LiveStacks &getLS() const { return Impl; } + void getAnalysisUsage(AnalysisUsage &AU) const override; void releaseMemory() override; @@ -102,6 +116,24 @@ public: void print(raw_ostream &O, const Module * = nullptr) const override; }; +class LiveStacksAnalysis : public AnalysisInfoMixin<LiveStacksAnalysis> { + static AnalysisKey Key; + friend AnalysisInfoMixin<LiveStacksAnalysis>; + +public: + using Result = LiveStacks; + + LiveStacks run(MachineFunction &MF, MachineFunctionAnalysisManager &); +}; + +class LiveStacksPrinterPass : public PassInfoMixin<LiveStacksPrinterPass> { + raw_ostream &OS; + +public: + LiveStacksPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &AM); +}; } // end namespace llvm #endif diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index 89d1b5edf3fa..1a8c32b37d55 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -156,8 +156,8 @@ private: // Intermediate data structures void HandlePhysRegUse(Register Reg, MachineInstr &MI); void HandlePhysRegDef(Register Reg, MachineInstr *MI, - SmallVectorImpl<unsigned> &Defs); - void UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs); + SmallVectorImpl<Register> &Defs); + void UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<Register> &Defs); /// FindLastRefOrPartRef - Return the last reference or partial reference of /// the specified register. @@ -167,7 +167,7 @@ private: // Intermediate data structures /// register. Also returns the sub-registers that're defined by the /// instruction. MachineInstr *FindLastPartialDef(Register Reg, - SmallSet<unsigned, 4> &PartDefRegs); + SmallSet<Register, 4> &PartDefRegs); /// analyzePHINodes - Gather information about the PHI nodes in here. In /// particular, we want to map the variable information of a virtual @@ -175,7 +175,7 @@ private: // Intermediate data structures /// is coming from. void analyzePHINodes(const MachineFunction& Fn); - void runOnInstr(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs, + void runOnInstr(MachineInstr &MI, SmallVectorImpl<Register> &Defs, unsigned NumRegs); void runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs); diff --git a/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h b/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h index 7d99b7731767..142e5cd4e7ad 100644 --- a/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h +++ b/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h @@ -31,7 +31,7 @@ LLT getLLTForType(Type &Ty, const DataLayout &DL); /// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish /// pointers, so these will convert to a plain integer. MVT getMVTForLLT(LLT Ty); -EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx); +EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx); /// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support /// scalarable vector types, and will assert if used. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 6cf151c951b1..7fe33c3913f2 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -32,6 +32,7 @@ namespace llvm { class BasicBlock; +class MachineDomTreeUpdater; class MachineFunction; class MCSymbol; class ModuleSlotTracker; @@ -972,22 +973,23 @@ public: /// MachineLoopInfo, as applicable. MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P, - std::vector<SparseBitVector<>> *LiveInSets = nullptr) { - return SplitCriticalEdge(Succ, &P, nullptr, LiveInSets); + std::vector<SparseBitVector<>> *LiveInSets = nullptr, + MachineDomTreeUpdater *MDTU = nullptr) { + return SplitCriticalEdge(Succ, &P, nullptr, LiveInSets, MDTU); } MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, MachineFunctionAnalysisManager &MFAM, - std::vector<SparseBitVector<>> *LiveInSets = nullptr) { - return SplitCriticalEdge(Succ, nullptr, &MFAM, LiveInSets); + std::vector<SparseBitVector<>> *LiveInSets = nullptr, + MachineDomTreeUpdater *MDTU = nullptr) { + return SplitCriticalEdge(Succ, nullptr, &MFAM, LiveInSets, MDTU); } // Helper method for new pass manager migration. - MachineBasicBlock * - SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P, - MachineFunctionAnalysisManager *MFAM, - std::vector<SparseBitVector<>> *LiveInSets); + MachineBasicBlock *SplitCriticalEdge( + MachineBasicBlock *Succ, Pass *P, MachineFunctionAnalysisManager *MFAM, + std::vector<SparseBitVector<>> *LiveInSets, MachineDomTreeUpdater *MDTU); /// Check if the edge between this block and the given successor \p /// Succ, can be split. If this returns true a subsequent call to @@ -1375,6 +1377,12 @@ inline auto successors(const MachineBasicBlock *BB) { return BB->successors(); } inline auto predecessors(const MachineBasicBlock *BB) { return BB->predecessors(); } +inline auto succ_size(const MachineBasicBlock *BB) { return BB->succ_size(); } +inline auto pred_size(const MachineBasicBlock *BB) { return BB->pred_size(); } +inline auto succ_begin(const MachineBasicBlock *BB) { return BB->succ_begin(); } +inline auto pred_begin(const MachineBasicBlock *BB) { return BB->pred_begin(); } +inline auto succ_end(const MachineBasicBlock *BB) { return BB->succ_end(); } +inline auto pred_end(const MachineBasicBlock *BB) { return BB->pred_end(); } /// MachineInstrSpan provides an interface to get an iteration range /// containing the instruction it was initialized with, along with all diff --git a/llvm/include/llvm/CodeGen/MachineDomTreeUpdater.h b/llvm/include/llvm/CodeGen/MachineDomTreeUpdater.h index 9e3971f0b9fc..fcdc0becf31c 100644 --- a/llvm/include/llvm/CodeGen/MachineDomTreeUpdater.h +++ b/llvm/include/llvm/CodeGen/MachineDomTreeUpdater.h @@ -69,5 +69,12 @@ extern template void GenericDomTreeUpdater<MachineDomTreeUpdater, MachineDominatorTree, MachinePostDominatorTree>::recalculate(MachineFunction &MF); + +extern template void GenericDomTreeUpdater< + MachineDomTreeUpdater, MachineDominatorTree, + MachinePostDominatorTree>::applyUpdatesImpl</*IsForward=*/true>(); +extern template void GenericDomTreeUpdater< + MachineDomTreeUpdater, MachineDominatorTree, + MachinePostDominatorTree>::applyUpdatesImpl</*IsForward=*/false>(); } // namespace llvm #endif // LLVM_CODEGEN_MACHINEDOMTREEUPDATER_H diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h index 74cf94398736..61635ff64502 100644 --- a/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/llvm/include/llvm/CodeGen/MachineDominators.h @@ -73,86 +73,22 @@ extern template bool Verify<MBBDomTree>(const MBBDomTree &DT, /// compute a normal dominator tree. /// class MachineDominatorTree : public DomTreeBase<MachineBasicBlock> { - /// Helper structure used to hold all the basic blocks - /// involved in the split of a critical edge. - struct CriticalEdge { - MachineBasicBlock *FromBB; - MachineBasicBlock *ToBB; - MachineBasicBlock *NewBB; - }; - - /// Pile up all the critical edges to be split. - /// The splitting of a critical edge is local and thus, it is possible - /// to apply several of those changes at the same time. - mutable SmallVector<CriticalEdge, 32> CriticalEdgesToSplit; - - /// Remember all the basic blocks that are inserted during - /// edge splitting. - /// Invariant: NewBBs == all the basic blocks contained in the NewBB - /// field of all the elements of CriticalEdgesToSplit. - /// I.e., forall elt in CriticalEdgesToSplit, it exists BB in NewBBs - /// such as BB == elt.NewBB. - mutable SmallSet<MachineBasicBlock *, 32> NewBBs; - - /// Apply all the recorded critical edges to the DT. - /// This updates the underlying DT information in a way that uses - /// the fast query path of DT as much as possible. - /// FIXME: This method should not be a const member! - /// - /// \post CriticalEdgesToSplit.empty(). - void applySplitCriticalEdges() const; public: using Base = DomTreeBase<MachineBasicBlock>; MachineDominatorTree() = default; - explicit MachineDominatorTree(MachineFunction &MF) { calculate(MF); } + explicit MachineDominatorTree(MachineFunction &MF) { recalculate(MF); } /// Handle invalidation explicitly. bool invalidate(MachineFunction &, const PreservedAnalyses &PA, MachineFunctionAnalysisManager::Invalidator &); - // FIXME: If there is an updater for MachineDominatorTree, - // migrate to this updater and remove these wrappers. - - MachineDominatorTree &getBase() { - applySplitCriticalEdges(); - return *this; - } - - MachineBasicBlock *getRoot() const { - applySplitCriticalEdges(); - return Base::getRoot(); - } - - MachineDomTreeNode *getRootNode() const { - applySplitCriticalEdges(); - return const_cast<MachineDomTreeNode *>(Base::getRootNode()); - } - - void calculate(MachineFunction &F); - - bool dominates(const MachineDomTreeNode *A, - const MachineDomTreeNode *B) const { - applySplitCriticalEdges(); - return Base::dominates(A, B); - } - - void getDescendants(MachineBasicBlock *A, - SmallVectorImpl<MachineBasicBlock *> &Result) { - applySplitCriticalEdges(); - Base::getDescendants(A, Result); - } - - bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { - applySplitCriticalEdges(); - return Base::dominates(A, B); - } + using Base::dominates; // dominates - Return true if A dominates B. This performs the // special checks necessary if A and B are in the same basic block. bool dominates(const MachineInstr *A, const MachineInstr *B) const { - applySplitCriticalEdges(); const MachineBasicBlock *BBA = A->getParent(), *BBB = B->getParent(); if (BBA != BBB) return Base::dominates(BBA, BBB); @@ -164,107 +100,6 @@ public: return &*I == A; } - - bool properlyDominates(const MachineDomTreeNode *A, - const MachineDomTreeNode *B) const { - applySplitCriticalEdges(); - return Base::properlyDominates(A, B); - } - - bool properlyDominates(const MachineBasicBlock *A, - const MachineBasicBlock *B) const { - applySplitCriticalEdges(); - return Base::properlyDominates(A, B); - } - - /// findNearestCommonDominator - Find nearest common dominator basic block - /// for basic block A and B. If there is no such block then return NULL. - MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, - MachineBasicBlock *B) { - applySplitCriticalEdges(); - return Base::findNearestCommonDominator(A, B); - } - - MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { - applySplitCriticalEdges(); - return Base::getNode(BB); - } - - /// getNode - return the (Post)DominatorTree node for the specified basic - /// block. This is the same as using operator[] on this class. - /// - MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { - applySplitCriticalEdges(); - return Base::getNode(BB); - } - - /// addNewBlock - Add a new node to the dominator tree information. This - /// creates a new node as a child of DomBB dominator node,linking it into - /// the children list of the immediate dominator. - MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB, - MachineBasicBlock *DomBB) { - applySplitCriticalEdges(); - return Base::addNewBlock(BB, DomBB); - } - - /// changeImmediateDominator - This method is used to update the dominator - /// tree information when a node's immediate dominator changes. - /// - void changeImmediateDominator(MachineBasicBlock *N, - MachineBasicBlock *NewIDom) { - applySplitCriticalEdges(); - Base::changeImmediateDominator(N, NewIDom); - } - - void changeImmediateDominator(MachineDomTreeNode *N, - MachineDomTreeNode *NewIDom) { - applySplitCriticalEdges(); - Base::changeImmediateDominator(N, NewIDom); - } - - /// eraseNode - Removes a node from the dominator tree. Block must not - /// dominate any other blocks. Removes node from its immediate dominator's - /// children list. Deletes dominator node associated with basic block BB. - void eraseNode(MachineBasicBlock *BB) { - applySplitCriticalEdges(); - Base::eraseNode(BB); - } - - /// splitBlock - BB is split and now it has one successor. Update dominator - /// tree to reflect this change. - void splitBlock(MachineBasicBlock* NewBB) { - applySplitCriticalEdges(); - Base::splitBlock(NewBB); - } - - /// isReachableFromEntry - Return true if A is dominated by the entry - /// block of the function containing it. - bool isReachableFromEntry(const MachineBasicBlock *A) { - applySplitCriticalEdges(); - return Base::isReachableFromEntry(A); - } - - /// Record that the critical edge (FromBB, ToBB) has been - /// split with NewBB. - /// This is best to use this method instead of directly update the - /// underlying information, because this helps mitigating the - /// number of time the DT information is invalidated. - /// - /// \note Do not use this method with regular edges. - /// - /// \note To benefit from the compile time improvement incurred by this - /// method, the users of this method have to limit the queries to the DT - /// interface between two edges splitting. In other words, they have to - /// pack the splitting of critical edges as much as possible. - void recordSplitCriticalEdge(MachineBasicBlock *FromBB, - MachineBasicBlock *ToBB, - MachineBasicBlock *NewBB) { - bool Inserted = NewBBs.insert(NewBB).second; - (void)Inserted; - assert(Inserted && - "A basic block inserted via edge splitting cannot appear twice"); - CriticalEdgesToSplit.push_back({FromBB, ToBB, NewBB}); - } }; /// \brief Analysis pass which computes a \c MachineDominatorTree. diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 547cc26eda22..d696add8a1af 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -186,6 +186,7 @@ public: Selected, TiedOpsRewritten, FailsVerification, + FailedRegAlloc, TracksDebugUserValues, LastProperty = TracksDebugUserValues, }; diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index ead6bbe1d5f6..1932bb9bd3da 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -555,13 +555,18 @@ public: /// will be dropped. void dropDebugNumber() { DebugInstrNum = 0; } - /// Emit an error referring to the source location of this instruction. - /// This should only be used for inline assembly that is somehow - /// impossible to compile. Other errors should have been handled much - /// earlier. - /// - /// If this method returns, the caller should try to recover from the error. - void emitError(StringRef Msg) const; + /// For inline asm, get the !srcloc metadata node if we have it, and decode + /// the loc cookie from it. + const MDNode *getLocCookieMD() const; + + /// Emit an error referring to the source location of this instruction. This + /// should only be used for inline assembly that is somehow impossible to + /// compile. Other errors should have been handled much earlier. + void emitInlineAsmError(const Twine &ErrMsg) const; + + // Emit an error in the LLVMContext referring to the source location of this + // instruction, if available. + void emitGenericError(const Twine &ErrMsg) const; /// Returns the target instruction descriptor of this MachineInstr. const MCInstrDesc &getDesc() const { return *MCID; } diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h b/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h index c1ae3d2d966d..f7a028625ee3 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -83,10 +83,14 @@ class MachineModuleInfoELF : public MachineModuleInfoImpl { /// extern_weak symbols. DenseMap<MCSymbol *, const MCExpr *> AuthPtrStubs; + /// HasSignedPersonality is true if the corresponding IR module has the + /// "ptrauth-sign-personality" flag set to 1. + bool HasSignedPersonality = false; + virtual void anchor(); // Out of line virtual method. public: - MachineModuleInfoELF(const MachineModuleInfo &) {} + MachineModuleInfoELF(const MachineModuleInfo &); StubValueTy &getGVStubEntry(MCSymbol *Sym) { assert(Sym && "Key cannot be null"); @@ -105,6 +109,8 @@ public: ExprStubListTy getAuthGVStubList() { return getSortedExprStubs(AuthPtrStubs); } + + bool hasSignedPersonality() const { return HasSignedPersonality; } }; /// MachineModuleInfoCOFF - This is a MachineModuleInfoImpl implementation diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h index 0cc862590d0c..8e47d0cead75 100644 --- a/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -40,6 +40,7 @@ #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H #define LLVM_CODEGEN_MACHINEPIPELINER_H +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineDominators.h" @@ -114,10 +115,123 @@ private: bool useWindowScheduler(bool Changed); }; +/// Represents a dependence between two instruction. +class SwingSchedulerDDGEdge { + SUnit *Dst = nullptr; + SDep Pred; + unsigned Distance = 0; + +public: + /// Creates an edge corresponding to an edge represented by \p PredOrSucc and + /// \p Dep in the original DAG. This pair has no information about the + /// direction of the edge, so we need to pass an additional argument \p + /// IsSucc. + SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc) + : Dst(PredOrSucc), Pred(Dep), Distance(0u) { + SUnit *Src = Dep.getSUnit(); + + if (IsSucc) { + std::swap(Src, Dst); + Pred.setSUnit(Src); + } + + // An anti-dependence to PHI means loop-carried dependence. + if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) { + Distance = 1; + std::swap(Src, Dst); + auto Reg = Pred.getReg(); + Pred = SDep(Src, SDep::Kind::Data, Reg); + } + } + + /// Returns the SUnit from which the edge comes (source node). + SUnit *getSrc() const { return Pred.getSUnit(); } + + /// Returns the SUnit to which the edge points (destination node). + SUnit *getDst() const { return Dst; } + + /// Returns the latency value for the edge. + unsigned getLatency() const { return Pred.getLatency(); } + + /// Sets the latency for the edge. + void setLatency(unsigned Latency) { Pred.setLatency(Latency); } + + /// Returns the distance value for the edge. + unsigned getDistance() const { return Distance; } + + /// Sets the distance value for the edge. + void setDistance(unsigned D) { Distance = D; } + + /// Returns the register associated with the edge. + Register getReg() const { return Pred.getReg(); } + + /// Returns true if the edge represents anti dependence. + bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; } + + /// Returns true if the edge represents output dependence. + bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; } + + /// Returns true if the edge represents a dependence that is not data, anti or + /// output dependence. + bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; } + + /// Returns true if the edge represents unknown scheduling barrier. + bool isBarrier() const { return Pred.isBarrier(); } + + /// Returns true if the edge represents an artificial dependence. + bool isArtificial() const { return Pred.isArtificial(); } + + /// Tests if this is a Data dependence that is associated with a register. + bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); } + + /// Returns true for DDG nodes that we ignore when computing the cost + /// functions. We ignore the back-edge recurrence in order to avoid unbounded + /// recursion in the calculation of the ASAP, ALAP, etc functions. + bool ignoreDependence(bool IgnoreAnti) const; +}; + +/// Represents dependencies between instructions. This class is a wrapper of +/// `SUnits` and its dependencies to manipulate back-edges in a natural way. +/// Currently it only supports back-edges via PHI, which are expressed as +/// anti-dependencies in the original DAG. +/// FIXME: Support any other loop-carried dependencies +class SwingSchedulerDDG { + using EdgesType = SmallVector<SwingSchedulerDDGEdge, 4>; + + struct SwingSchedulerDDGEdges { + EdgesType Preds; + EdgesType Succs; + }; + + void initEdges(SUnit *SU); + + SUnit *EntrySU; + SUnit *ExitSU; + + std::vector<SwingSchedulerDDGEdges> EdgesVec; + SwingSchedulerDDGEdges EntrySUEdges; + SwingSchedulerDDGEdges ExitSUEdges; + + void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge); + + SwingSchedulerDDGEdges &getEdges(const SUnit *SU); + const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const; + +public: + SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU); + + const EdgesType &getInEdges(const SUnit *SU) const; + + const EdgesType &getOutEdges(const SUnit *SU) const; +}; + /// This class builds the dependence graph for the instructions in a loop, /// and attempts to schedule the instructions using the SMS algorithm. class SwingSchedulerDAG : public ScheduleDAGInstrs { MachinePipeliner &Pass; + + std::unique_ptr<SwingSchedulerDDG> DDG; + /// The minimum initiation interval between iterations for this schedule. unsigned MII = 0; /// The maximum initiation interval between iterations for this schedule. @@ -130,7 +244,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { unsigned II_setByPragma = 0; TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr; - /// A toplogical ordering of the SUnits, which is needed for changing + /// A topological ordering of the SUnits, which is needed for changing /// dependences and iterating over the SUnits. ScheduleDAGTopologicalSort Topo; @@ -252,27 +366,7 @@ public: return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; } - /// Return true if the dependence is a back-edge in the data dependence graph. - /// Since the DAG doesn't contain cycles, we represent a cycle in the graph - /// using an anti dependence from a Phi to an instruction. - bool isBackedge(SUnit *Source, const SDep &Dep) { - if (Dep.getKind() != SDep::Anti) - return false; - return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); - } - - bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, - bool isSucc = true) const; - - /// The distance function, which indicates that operation V of iteration I - /// depends on operations U of iteration I-distance. - unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) { - // Instructions that feed a Phi have a distance of 1. Computing larger - // values for arrays requires data dependence information. - if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti) - return 1; - return 0; - } + bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const; void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); @@ -294,6 +388,8 @@ public: static bool classof(const ScheduleDAGInstrs *DAG) { return true; } + const SwingSchedulerDDG *getDDG() const { return DDG.get(); } + private: void addLoopCarriedDependences(AAResults *AA); void updatePhiDependences(); @@ -357,6 +453,7 @@ public: // // Hold a map from each SUnit in the circle to the maximum distance from the // source node by only considering the nodes. + const SwingSchedulerDDG *DDG = DAG->getDDG(); DenseMap<SUnit *, unsigned> SUnitToDistance; for (auto *Node : Nodes) SUnitToDistance[Node] = 0; @@ -364,8 +461,8 @@ public: for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) { SUnit *U = Nodes[I - 1]; SUnit *V = Nodes[I % Nodes.size()]; - for (const SDep &Succ : U->Succs) { - SUnit *SuccSUnit = Succ.getSUnit(); + for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) { + SUnit *SuccSUnit = Succ.getDst(); if (V != SuccSUnit) continue; if (SUnitToDistance[U] + Succ.getLatency() > SUnitToDistance[V]) { @@ -377,13 +474,13 @@ public: SUnit *FirstNode = Nodes[0]; SUnit *LastNode = Nodes[Nodes.size() - 1]; - for (auto &PI : LastNode->Preds) { + for (auto &PI : DDG->getInEdges(LastNode)) { // If we have an order dep that is potentially loop carried then a // back-edge exists between the last node and the first node that isn't // modeled in the DAG. Handle it manually by adding 1 to the distance of // the last node. - if (PI.getSUnit() != FirstNode || PI.getKind() != SDep::Order || - !DAG->isLoopCarriedDep(LastNode, PI, false)) + if (PI.getSrc() != FirstNode || !PI.isOrderDep() || + !DAG->isLoopCarriedDep(PI)) continue; SUnitToDistance[FirstNode] = std::max(SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1); @@ -627,11 +724,13 @@ public: /// Return the cycle of the earliest scheduled instruction in the dependence /// chain. - int earliestCycleInChain(const SDep &Dep); + int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, + const SwingSchedulerDDG *DDG); /// Return the cycle of the latest scheduled instruction in the dependence /// chain. - int latestCycleInChain(const SDep &Dep); + int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, + const SwingSchedulerDDG *DDG); void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG); @@ -694,7 +793,7 @@ public: MachineOperand &MO) const; bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, - SwingSchedulerDAG *DAG) const; + const SwingSchedulerDDG *DDG) const; void print(raw_ostream &os) const; void dump() const; }; diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index 5dc51aaed81c..5ee3aef28a4f 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -1095,9 +1095,6 @@ public: return !operator==(x); } - /// atEnd - return true if this iterator is equal to reg_end() on the value. - bool atEnd() const { return Op == nullptr; } - // Iterator traversal: forward iteration only defusechain_iterator &operator++() { // Preincrement assert(Op && "Cannot increment end iterator!"); @@ -1203,9 +1200,6 @@ public: return !operator==(x); } - /// atEnd - return true if this iterator is equal to reg_end() on the value. - bool atEnd() const { return Op == nullptr; } - // Iterator traversal: forward iteration only defusechain_instr_iterator &operator++() { // Preincrement assert(Op && "Cannot increment end iterator!"); diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h index b70450c19f28..0e4304f69380 100644 --- a/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -24,12 +24,6 @@ class MachineInstr; class MachineFunction; class Register; -inline unsigned succ_size(const MachineBasicBlock *BB) { - return BB->succ_size(); -} -inline unsigned pred_size(const MachineBasicBlock *BB) { - return BB->pred_size(); -} inline auto instrs(const MachineBasicBlock &BB) { return BB.instrs(); } template <> struct GenericSSATraits<MachineFunction> { diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index 42d132efec2e..3dd62b2ba333 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -99,8 +99,16 @@ namespace llvm { -extern cl::opt<bool> ForceTopDown; -extern cl::opt<bool> ForceBottomUp; +namespace MISched { +enum Direction { + Unspecified, + TopDown, + BottomUp, + Bidirectional, +}; +} // namespace MISched + +extern cl::opt<MISched::Direction> PreRADirection; extern cl::opt<bool> VerifyScheduling; #ifndef NDEBUG extern cl::opt<bool> ViewMISchedDAGs; diff --git a/llvm/include/llvm/CodeGen/SDNodeProperties.td b/llvm/include/llvm/CodeGen/SDNodeProperties.td index 3cb304f47f4b..d32904283a11 100644 --- a/llvm/include/llvm/CodeGen/SDNodeProperties.td +++ b/llvm/include/llvm/CodeGen/SDNodeProperties.td @@ -29,5 +29,3 @@ def SDNPMayLoad : SDNodeProperty; // May read memory, sets 'mayLoad'. def SDNPSideEffect : SDNodeProperty; // Sets 'HasUnmodelledSideEffects'. def SDNPMemOperand : SDNodeProperty; // Touches memory, has assoc MemOperand def SDNPVariadic : SDNodeProperty; // Node has variable arguments. -def SDNPWantRoot : SDNodeProperty; // ComplexPattern gets the root of match -def SDNPWantParent : SDNodeProperty; // ComplexPattern gets the parent diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 96667952a16e..d21cc962da46 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -507,6 +507,13 @@ m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F) { return TernaryOpc_match<T0_P, T1_P, T2_P>(ISD::VSELECT, Cond, T, F); } +template <typename T0_P, typename T1_P, typename T2_P> +inline TernaryOpc_match<T0_P, T1_P, T2_P> +m_InsertElt(const T0_P &Vec, const T1_P &Val, const T2_P &Idx) { + return TernaryOpc_match<T0_P, T1_P, T2_P>(ISD::INSERT_VECTOR_ELT, Vec, Val, + Idx); +} + // === Binary operations === template <typename LHS_P, typename RHS_P, bool Commutable = false, bool ExcludeChain = false> @@ -790,6 +797,11 @@ inline BinaryOpc_match<LHS, RHS> m_FRem(const LHS &L, const RHS &R) { return BinaryOpc_match<LHS, RHS>(ISD::FREM, L, R); } +template <typename LHS, typename RHS> +inline BinaryOpc_match<LHS, RHS> m_ExtractElt(const LHS &Vec, const RHS &Idx) { + return BinaryOpc_match<LHS, RHS>(ISD::EXTRACT_VECTOR_ELT, Vec, Idx); +} + // === Unary operations === template <typename Opnd_P, bool ExcludeChain = false> struct UnaryOpc_match { unsigned Opcode; diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 2e3507386df3..ff7caec41855 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -44,6 +44,7 @@ #include <cstdint> #include <functional> #include <map> +#include <set> #include <string> #include <tuple> #include <utility> @@ -247,6 +248,9 @@ class SelectionDAG { BlockFrequencyInfo *BFI = nullptr; MachineModuleInfo *MMI = nullptr; + /// Extended EVTs used for single value VTLists. + std::set<EVT, EVT::compareRawBits> EVTs; + /// List of non-single value types. FoldingSet<SDVTListNode> VTListMap; @@ -451,6 +455,9 @@ public: // Maximum depth for recursive analysis such as computeKnownBits, etc. static constexpr unsigned MaxRecursionDepth = 6; + // Returns the maximum steps for SDNode->hasPredecessor() like searches. + static unsigned getHasPredecessorMaxSteps(); + explicit SelectionDAG(const TargetMachine &TM, CodeGenOptLevel); SelectionDAG(const SelectionDAG &) = delete; SelectionDAG &operator=(const SelectionDAG &) = delete; @@ -1323,8 +1330,8 @@ public: /// Creates a MemIntrinsicNode that may produce a /// result and takes a list of operands. Opcode may be INTRINSIC_VOID, - /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not - /// less than FIRST_TARGET_MEMORY_OPCODE. + /// INTRINSIC_W_CHAIN, or a target-specific memory-referencing opcode + // (see `SelectionDAGTargetInfo::isTargetMemoryOpcode`). SDValue getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 677b59e0c8fb..03899493847b 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -210,7 +210,6 @@ public: inline const SDValue &getOperand(unsigned i) const; inline uint64_t getConstantOperandVal(unsigned i) const; inline const APInt &getConstantOperandAPInt(unsigned i) const; - inline bool isTargetMemoryOpcode() const; inline bool isTargetOpcode() const; inline bool isMachineOpcode() const; inline bool isUndef() const; @@ -310,6 +309,9 @@ public: /// Get the next SDUse in the use list. SDUse *getNext() const { return Next; } + /// Return the operand # of this use in its user. + inline unsigned getOperandNo() const; + /// Convenience function for get().getNode(). SDNode *getNode() const { return Val.getNode(); } /// Convenience function for get().getResNo(). @@ -664,7 +666,7 @@ private: DebugLoc debugLoc; /// Return a pointer to the specified value type. - static const EVT *getValueTypeList(EVT VT); + static const EVT *getValueTypeList(MVT VT); /// Index in worklist of DAGCombiner, or negative if the node is not in the /// worklist. -1 = not in worklist; -2 = not in worklist, but has already been @@ -688,22 +690,6 @@ public: /// \<target\>ISD namespace). bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; } - /// Test if this node has a target-specific opcode that may raise - /// FP exceptions (in the \<target\>ISD namespace and greater than - /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory - /// opcode are currently automatically considered to possibly raise - /// FP exceptions as well. - bool isTargetStrictFPOpcode() const { - return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE; - } - - /// Test if this node has a target-specific - /// memory-referencing opcode (in the \<target\>ISD namespace and - /// greater than FIRST_TARGET_MEMORY_OPCODE). - bool isTargetMemoryOpcode() const { - return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE; - } - /// Return true if the type of the node type undefined. bool isUndef() const { return NodeType == ISD::UNDEF; } @@ -806,9 +792,6 @@ public: return !operator==(x); } - /// Return true if this iterator is at the end of uses list. - bool atEnd() const { return Op == nullptr; } - // Iterator traversal: forward iteration only. use_iterator &operator++() { // Preincrement assert(Op && "Cannot increment end iterator!"); @@ -821,20 +804,49 @@ public: } /// Retrieve a pointer to the current user node. - SDNode *operator*() const { + SDUse &operator*() const { assert(Op && "Cannot dereference end iterator!"); - return Op->getUser(); + return *Op; } - SDNode *operator->() const { return operator*(); } + SDUse *operator->() const { return &operator*(); } + }; - SDUse &getUse() const { return *Op; } + class user_iterator { + friend class SDNode; + use_iterator UI; - /// Retrieve the operand # of this use in its user. - unsigned getOperandNo() const { - assert(Op && "Cannot dereference end iterator!"); - return (unsigned)(Op - Op->getUser()->OperandList); + explicit user_iterator(SDUse *op) : UI(op) {}; + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = SDNode *; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + + user_iterator() = default; + + bool operator==(const user_iterator &x) const { return UI == x.UI; } + bool operator!=(const user_iterator &x) const { return !operator==(x); } + + user_iterator &operator++() { // Preincrement + ++UI; + return *this; + } + + user_iterator operator++(int) { // Postincrement + auto tmp = *this; + ++*this; + return tmp; } + + // Retrieve a pointer to the current User. + SDNode *operator*() const { return UI->getUser(); } + + SDNode *operator->() const { return operator*(); } + + SDUse &getUse() const { return *UI; } }; /// Provide iteration support to walk over all uses of an SDNode. @@ -851,6 +863,18 @@ public: return make_range(use_begin(), use_end()); } + /// Provide iteration support to walk over all users of an SDNode. + user_iterator user_begin() const { return user_iterator(UseList); } + + static user_iterator user_end() { return user_iterator(nullptr); } + + inline iterator_range<user_iterator> users() { + return make_range(user_begin(), user_end()); + } + inline iterator_range<user_iterator> users() const { + return make_range(user_begin(), user_end()); + } + /// Return true if there are exactly NUSES uses of the indicated value. /// This method ignores uses of other values defined by this operation. bool hasNUsesOfValue(unsigned NUses, unsigned Value) const; @@ -1010,9 +1034,9 @@ public: /// If this node has a glue value with a user, return /// the user (there is at most one). Otherwise return NULL. SDNode *getGluedUser() const { - for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI) - if (UI.getUse().get().getValueType() == MVT::Glue) - return *UI; + for (SDUse &U : uses()) + if (U.getValueType() == MVT::Glue) + return U.getUser(); return nullptr; } @@ -1124,7 +1148,7 @@ public: void addUse(SDUse &U) { U.addToList(&UseList); } protected: - static SDVTList getSDVTList(EVT VT) { + static SDVTList getSDVTList(MVT VT) { SDVTList Ret = { getValueTypeList(VT), 1 }; return Ret; } @@ -1214,10 +1238,6 @@ inline bool SDValue::isTargetOpcode() const { return Node->isTargetOpcode(); } -inline bool SDValue::isTargetMemoryOpcode() const { - return Node->isTargetMemoryOpcode(); -} - inline bool SDValue::isMachineOpcode() const { return Node->isMachineOpcode(); } @@ -1259,6 +1279,9 @@ inline void SDValue::dumpr(const SelectionDAG *G) const { } // Define inline functions from the SDUse class. +inline unsigned SDUse::getOperandNo() const { + return this - getUser()->op_begin(); +} inline void SDUse::set(const SDValue &V) { if (Val.getNode()) removeFromList(); @@ -1571,10 +1594,10 @@ public: } }; -/// This SDNode is used for target intrinsics that touch -/// memory and need an associated MachineMemOperand. Its opcode may be -/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode -/// with a value not less than FIRST_TARGET_MEMORY_OPCODE. +/// This SDNode is used for target intrinsics that touch memory and need +/// an associated MachineMemOperand. Its opcode may be INTRINSIC_VOID, +/// INTRINSIC_W_CHAIN, PREFETCH, or a target-specific memory-referencing +/// opcode (see `SelectionDAGTargetInfo::isTargetMemoryOpcode`). class MemIntrinsicSDNode : public MemSDNode { public: MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h index 720c9463867c..ef5ae5dba58d 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -35,6 +35,19 @@ public: SelectionDAGTargetInfo &operator=(const SelectionDAGTargetInfo &) = delete; virtual ~SelectionDAGTargetInfo(); + /// Returns true if a node with the given target-specific opcode has + /// a memory operand. Nodes with such opcodes can only be created with + /// `SelectionDAG::getMemIntrinsicNode`. + virtual bool isTargetMemoryOpcode(unsigned Opcode) const { return false; } + + /// Returns true if a node with the given target-specific opcode has + /// strict floating-point semantics. + virtual bool isTargetStrictFPOpcode(unsigned Opcode) const { return false; } + + /// Returns true if a node with the given target-specific opcode + /// may raise a floating-point exception. + virtual bool mayRaiseFPException(unsigned Opcode) const; + /// Emit target-specific code that performs a memcpy. /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple loads/stores and can be diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a207f3886bd0..3751aac4df8e 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1720,10 +1720,9 @@ public: return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); } - /// Return the desired alignment for ByVal or InAlloca aggregate function - /// arguments in the caller parameter area. This is the actual alignment, not - /// its logarithm. - virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; + /// Returns the desired alignment for ByVal or InAlloca aggregate function + /// arguments in the caller parameter area. + virtual Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; /// Return the type of registers that this ValueType will eventually require. MVT getRegisterType(MVT VT) const { @@ -2980,10 +2979,9 @@ public: } virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } - virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL, - LLVMContext &Ctx) const { - return isTruncateFree(getApproximateEVTForLLT(FromTy, DL, Ctx), - getApproximateEVTForLLT(ToTy, DL, Ctx)); + virtual bool isTruncateFree(LLT FromTy, LLT ToTy, LLVMContext &Ctx) const { + return isTruncateFree(getApproximateEVTForLLT(FromTy, Ctx), + getApproximateEVTForLLT(ToTy, Ctx)); } /// Return true if truncating the specific node Val to type VT2 is free. @@ -3066,10 +3064,9 @@ public: } virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } - virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL, - LLVMContext &Ctx) const { - return isZExtFree(getApproximateEVTForLLT(FromTy, DL, Ctx), - getApproximateEVTForLLT(ToTy, DL, Ctx)); + virtual bool isZExtFree(LLT FromTy, LLT ToTy, LLVMContext &Ctx) const { + return isZExtFree(getApproximateEVTForLLT(FromTy, Ctx), + getApproximateEVTForLLT(ToTy, Ctx)); } /// Return true if zero-extending the specific node Val to type VT2 is free @@ -4305,6 +4302,12 @@ public: /// @param Level the current DAGCombine legalization level. virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { + SDValue ShiftLHS = N->getOperand(0); + if (!ShiftLHS->hasOneUse()) + return false; + if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND && + !ShiftLHS.getOperand(0)->hasOneUse()) + return false; return true; } diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 8eef45ce565d..a2a9e5d499e5 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -52,7 +52,13 @@ public: void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override; void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &DL, - const MCSymbol *Sym) const override; + const MCSymbol *Sym, + const MachineModuleInfo *MMI) const override; + + virtual void emitPersonalityValueImpl(MCStreamer &Streamer, + const DataLayout &DL, + const MCSymbol *Sym, + const MachineModuleInfo *MMI) const; /// Given a constant with the SectionKind, return a section that it should be /// placed in. diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 292fa3c94969..f4bf74c8caa5 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -41,12 +41,10 @@ class RegScavenger; class VirtRegMap; class LiveIntervals; class LiveInterval; - class TargetRegisterClass { public: using iterator = const MCPhysReg *; using const_iterator = const MCPhysReg *; - using sc_iterator = const TargetRegisterClass* const *; // Instance variables filled by tablegen, do not use! const MCRegisterClass *MC; @@ -67,7 +65,8 @@ public: /// Whether a combination of subregisters can cover every register in the /// class. See also the CoveredBySubRegs description in Target.td. const bool CoveredBySubRegs; - const sc_iterator SuperClasses; + const unsigned *SuperClasses; + const uint16_t SuperClassesSize; ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&); /// Return the register class ID number. @@ -175,18 +174,16 @@ public: return SuperRegIndices; } - /// Returns a NULL-terminated list of super-classes. The + /// Returns a list of super-classes. The /// classes are ordered by ID which is also a topological ordering from large /// to small classes. The list does NOT include the current class. - sc_iterator getSuperClasses() const { - return SuperClasses; + ArrayRef<unsigned> superclasses() const { + return ArrayRef(SuperClasses, SuperClassesSize); } /// Return true if this TargetRegisterClass is a subset /// class of at least one other TargetRegisterClass. - bool isASubClass() const { - return SuperClasses[0] != nullptr; - } + bool isASubClass() const { return SuperClasses != nullptr; } /// Returns the preferred order for allocating registers from this register /// class in MF. The raw order comes directly from the .td file and may @@ -350,6 +347,13 @@ public: const TargetRegisterClass *getMinimalPhysRegClass(MCRegister Reg, MVT VT = MVT::Other) const; + /// Returns the common Register Class of two physical registers of the given + /// type, picking the most sub register class of the right type that contains + /// these two physregs. + const TargetRegisterClass * + getCommonMinimalPhysRegClass(MCRegister Reg1, MCRegister Reg2, + MVT VT = MVT::Other) const; + /// Returns the Register Class of a physical register of the given type, /// picking the most sub register class of the right type that contains this /// physreg. If there is no register class compatible with the given type, @@ -357,6 +361,14 @@ public: const TargetRegisterClass *getMinimalPhysRegClassLLT(MCRegister Reg, LLT Ty = LLT()) const; + /// Returns the common Register Class of two physical registers of the given + /// type, picking the most sub register class of the right type that contains + /// these two physregs. If there is no register class compatible with the + /// given type, returns nullptr. + const TargetRegisterClass * + getCommonMinimalPhysRegClassLLT(MCRegister Reg1, MCRegister Reg2, + LLT Ty = LLT()) const; + /// Return the maximal subclass of the given register class that is /// allocatable or NULL. const TargetRegisterClass * diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index 23d86248ff87..a94ebf55f6c1 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -63,7 +63,8 @@ class Triple; class TargetSubtargetInfo : public MCSubtargetInfo { protected: // Can only create subclasses... TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU, - StringRef FS, ArrayRef<SubtargetFeatureKV> PF, + StringRef FS, ArrayRef<StringRef> PN, + ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, @@ -349,6 +350,8 @@ public: // Conservatively assume such instructions exist by default. return true; } + + virtual bool isRegisterReservedByUser(Register R) const { return false; } }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGenTypes/LowLevelType.h b/llvm/include/llvm/CodeGenTypes/LowLevelType.h index 62ee28cfac99..06879e1f8d15 100644 --- a/llvm/include/llvm/CodeGenTypes/LowLevelType.h +++ b/llvm/include/llvm/CodeGenTypes/LowLevelType.h @@ -169,8 +169,7 @@ public: /// vector types. constexpr bool isScalable() const { assert(isVector() && "Expected a vector type"); - return IsPointer ? getFieldValue(PointerVectorScalableFieldInfo) - : getFieldValue(VectorScalableFieldInfo); + return getFieldValue(VectorScalableFieldInfo); } /// Returns true if the LLT is a fixed vector. Returns false otherwise, even @@ -183,9 +182,7 @@ public: constexpr ElementCount getElementCount() const { assert(IsVector && "cannot get number of elements on scalar/aggregate"); - return ElementCount::get(IsPointer - ? getFieldValue(PointerVectorElementsFieldInfo) - : getFieldValue(VectorElementsFieldInfo), + return ElementCount::get(getFieldValue(VectorElementsFieldInfo), isScalable()); } @@ -265,25 +262,15 @@ public: } constexpr unsigned getScalarSizeInBits() const { - if (IsScalar) - return getFieldValue(ScalarSizeFieldInfo); - if (IsVector) { - if (!IsPointer) - return getFieldValue(VectorSizeFieldInfo); - else - return getFieldValue(PointerVectorSizeFieldInfo); - } - assert(IsPointer && "unexpected LLT"); - return getFieldValue(PointerSizeFieldInfo); + if (isPointerOrPointerVector()) + return getFieldValue(PointerSizeFieldInfo); + return getFieldValue(ScalarSizeFieldInfo); } constexpr unsigned getAddressSpace() const { - assert(RawData != 0 && "Invalid Type"); - assert(IsPointer && "cannot get address space of non-pointer type"); - if (!IsVector) - return getFieldValue(PointerAddressSpaceFieldInfo); - else - return getFieldValue(PointerVectorAddressSpaceFieldInfo); + assert(isPointerOrPointerVector() && + "cannot get address space of non-pointer type"); + return getFieldValue(PointerAddressSpaceFieldInfo); } /// Returns the vector's element type. Only valid for vector types. @@ -352,44 +339,23 @@ private: /// valid encodings, SizeInBits/SizeOfElement must be larger than 0. /// * Non-pointer scalar (isPointer == 0 && isVector == 0): /// SizeInBits: 32; - static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0}; + static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 29}; /// * Pointer (isPointer == 1 && isVector == 0): /// SizeInBits: 16; /// AddressSpace: 24; - static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0}; - static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{ - 24, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]}; - static_assert((PointerAddressSpaceFieldInfo[0] + - PointerAddressSpaceFieldInfo[1]) <= 61, - "Insufficient bits to encode all data"); + static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 45}; + static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{24, 21}; /// * Vector-of-non-pointer (isPointer == 0 && isVector == 1): /// NumElements: 16; /// SizeOfElement: 32; /// Scalable: 1; - static const constexpr BitFieldInfo VectorElementsFieldInfo{16, 0}; - static const constexpr BitFieldInfo VectorSizeFieldInfo{ - 32, VectorElementsFieldInfo[0] + VectorElementsFieldInfo[1]}; - static const constexpr BitFieldInfo VectorScalableFieldInfo{ - 1, VectorSizeFieldInfo[0] + VectorSizeFieldInfo[1]}; - static_assert((VectorSizeFieldInfo[0] + VectorSizeFieldInfo[1]) <= 61, - "Insufficient bits to encode all data"); + static const constexpr BitFieldInfo VectorElementsFieldInfo{16, 5}; + static const constexpr BitFieldInfo VectorScalableFieldInfo{1, 0}; /// * Vector-of-pointer (isPointer == 1 && isVector == 1): /// NumElements: 16; /// SizeOfElement: 16; /// AddressSpace: 24; /// Scalable: 1; - static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0}; - static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{ - 16, - PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]}; - static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{ - 24, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]}; - static const constexpr BitFieldInfo PointerVectorScalableFieldInfo{ - 1, PointerVectorAddressSpaceFieldInfo[0] + - PointerVectorAddressSpaceFieldInfo[1]}; - static_assert((PointerVectorAddressSpaceFieldInfo[0] + - PointerVectorAddressSpaceFieldInfo[1]) <= 61, - "Insufficient bits to encode all data"); uint64_t IsScalar : 1; uint64_t IsPointer : 1; @@ -422,28 +388,16 @@ private: this->IsPointer = IsPointer; this->IsVector = IsVector; this->IsScalar = IsScalar; - if (IsScalar) - RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo); - else if (IsVector) { - assert(EC.isVector() && "invalid number of vector elements"); - if (!IsPointer) - RawData = - maskAndShift(EC.getKnownMinValue(), VectorElementsFieldInfo) | - maskAndShift(SizeInBits, VectorSizeFieldInfo) | - maskAndShift(EC.isScalable() ? 1 : 0, VectorScalableFieldInfo); - else - RawData = - maskAndShift(EC.getKnownMinValue(), - PointerVectorElementsFieldInfo) | - maskAndShift(SizeInBits, PointerVectorSizeFieldInfo) | - maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo) | - maskAndShift(EC.isScalable() ? 1 : 0, - PointerVectorScalableFieldInfo); - } else if (IsPointer) + if (IsPointer) { RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) | maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo); - else - llvm_unreachable("unexpected LLT configuration"); + } else { + RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo); + } + if (IsVector) { + RawData |= maskAndShift(EC.getKnownMinValue(), VectorElementsFieldInfo) | + maskAndShift(EC.isScalable() ? 1 : 0, VectorScalableFieldInfo); + } } public: diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h index 3c936b938650..bd25f6c30ebf 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h @@ -9,6 +9,7 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFTYPEPRINTER_H #define LLVM_DEBUGINFO_DWARF_DWARFTYPEPRINTER_H +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index c2365a4c7cf6..7b51bb63cd15 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -68,7 +68,9 @@ public: /// Inserts the address range. If the range overlaps with an existing /// range, the range that it overlaps with will be returned and the two - /// address ranges will be unioned together in "Ranges". + /// address ranges will be unioned together in "Ranges". If a duplicate + /// entry is attempted to be added, the duplicate range will not actually be + /// added and the returned iterator will point to end(). /// /// This is used for finding overlapping ranges in the DW_AT_ranges /// attribute of a DIE. It is also used as a set of address ranges that @@ -77,7 +79,9 @@ public: /// Inserts the address range info. If any of its ranges overlaps with a /// range in an existing range info, the range info is *not* added and an - /// iterator to the overlapping range info. + /// iterator to the overlapping range info. If a duplicate entry is + /// attempted to be added, the duplicate range will not actually be added + /// and the returned iterator will point to end(). /// /// This is used for finding overlapping children of the same DIE. die_range_info_iterator insert(const DieRangeInfo &RI); @@ -86,7 +90,7 @@ public: bool contains(const DieRangeInfo &RHS) const; /// Return true if any range in this object intersects with any range in - /// RHS. + /// RHS. Identical ranges are not considered to be intersecting. bool intersects(const DieRangeInfo &RHS) const; }; diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h index 198c0ddc2658..d4f39ec0dc28 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -33,14 +33,17 @@ class OutputAggregator; /// allows this class to be unit tested. class DwarfTransformer { public: - /// Create a DWARF transformer. /// /// \param D The DWARF to use when converting to GSYM. /// /// \param G The GSYM creator to populate with the function information /// from the debug info. - DwarfTransformer(DWARFContext &D, GsymCreator &G) : DICtx(D), Gsym(G) {} + /// + /// \param LDCS Flag to indicate whether we should load the call site + /// information from DWARF `DW_TAG_call_site` entries + DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false) + : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {} /// Extract the DWARF from the supplied object file and convert it into the /// Gsym format in the GsymCreator object that is passed in. Returns an @@ -83,8 +86,16 @@ private: /// \param Die The DWARF debug info entry to parse. void handleDie(OutputAggregator &Strm, CUInfo &CUI, DWARFDie Die); + /// Parse call site information from DWARF + /// + /// \param CUI The compile unit info for the current CU. + /// \param Die The DWARFDie for the function. + /// \param FI The FunctionInfo for the function being populated. + void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI); + DWARFContext &DICtx; GsymCreator &Gsym; + bool LoadDwarfCallSites; friend class DwarfTransformerTest; }; diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h index 72b7f3e7bfc4..3d532588a702 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -199,7 +199,11 @@ public: /// \param OS The output stream to dump to. /// /// \param CSIC The CallSiteInfoCollection object to dump. - void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC); + /// + /// \param Indent The indentation as number of spaces. Used when dumping as an + /// item from within MergedFunctionsInfo. + void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC, + uint32_t Indent = 0); /// Dump a LineTable object. /// diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 3dfe838bf5fd..7fba3fdc1abc 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -2077,17 +2077,23 @@ public: class CallExpr : public Node { const Node *Callee; NodeArray Args; + bool IsParen; // (func)(args ...) ? public: - CallExpr(const Node *Callee_, NodeArray Args_, Prec Prec_) - : Node(KCallExpr, Prec_), Callee(Callee_), Args(Args_) {} + CallExpr(const Node *Callee_, NodeArray Args_, bool IsParen_, Prec Prec_) + : Node(KCallExpr, Prec_), Callee(Callee_), Args(Args_), + IsParen(IsParen_) {} template <typename Fn> void match(Fn F) const { - F(Callee, Args, getPrecedence()); + F(Callee, Args, IsParen, getPrecedence()); } void printLeft(OutputBuffer &OB) const override { + if (IsParen) + OB.printOpen(); Callee->print(OB); + if (IsParen) + OB.printClose(); OB.printOpen(); Args.printWithComma(OB); OB.printClose(); @@ -3354,9 +3360,12 @@ const typename AbstractManglingParser< "operator co_await"}, {"az", OperatorInfo::OfIdOp, /*Type*/ false, Node::Prec::Unary, "alignof "}, {"cc", OperatorInfo::NamedCast, false, Node::Prec::Postfix, "const_cast"}, - {"cl", OperatorInfo::Call, false, Node::Prec::Postfix, "operator()"}, + {"cl", OperatorInfo::Call, /*Paren*/ false, Node::Prec::Postfix, + "operator()"}, {"cm", OperatorInfo::Binary, false, Node::Prec::Comma, "operator,"}, {"co", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator~"}, + {"cp", OperatorInfo::Call, /*Paren*/ true, Node::Prec::Postfix, + "operator()"}, {"cv", OperatorInfo::CCast, false, Node::Prec::Cast, "operator"}, // C Cast {"dV", OperatorInfo::Binary, false, Node::Prec::Assign, "operator/="}, {"da", OperatorInfo::Del, /*Ary*/ true, Node::Prec::Unary, @@ -5099,6 +5108,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseRequiresExpr() { // ::= <binary operator-name> <expression> <expression> // ::= <ternary operator-name> <expression> <expression> <expression> // ::= cl <expression>+ E # call +// ::= cp <base-unresolved-name> <expression>* E # (name) (expr-list), call that would use argument-dependent lookup but for the parentheses // ::= cv <type> <expression> # conversion with one argument // ::= cv <type> _ <expression>* E # conversion with a different number of arguments // ::= [gs] nw <expression>* _ <type> E # new (expr-list) type @@ -5234,7 +5244,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() { Names.push_back(E); } return make<CallExpr>(Callee, popTrailingNodeArray(ExprsBegin), - Op->getPrecedence()); + /*IsParen=*/Op->getFlag(), Op->getPrecedence()); } case OperatorInfo::CCast: { // C Cast: (type)expr @@ -5421,7 +5431,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() { } } return make<CallExpr>(Name, popTrailingNodeArray(ExprsBegin), - Node::Prec::Postfix); + /*IsParen=*/false, Node::Prec::Postfix); } // Only unresolved names remain. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/COFF.h b/llvm/include/llvm/ExecutionEngine/JITLink/COFF.h index 87d3648d37e8..33b661933ace 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/COFF.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/COFF.h @@ -24,7 +24,8 @@ namespace jitlink { /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromCOFFObject(MemoryBufferRef ObjectBuffer); +createLinkGraphFromCOFFObject(MemoryBufferRef ObjectBuffer, + std::shared_ptr<orc::SymbolStringPool> SSP); /// Link the given graph. /// diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/COFF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/COFF_x86_64.h index fff32d6d9609..2072ae9dfdbe 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/COFF_x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/COFF_x86_64.h @@ -23,8 +23,8 @@ namespace jitlink { /// Note: The graph does not take ownership of the underlying buffer, nor copy /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. -Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromCOFFObject_x86_64(MemoryBufferRef ObjectBuffer); +Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromCOFFObject_x86_64( + MemoryBufferRef ObjectBuffer, std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be a COFF x86-64 object file. void link_COFF_x86_64(std::unique_ptr<LinkGraph> G, diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h index 038591f9add0..3decba65f380 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h @@ -24,7 +24,8 @@ namespace jitlink { /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer); +createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer, + std::shared_ptr<orc::SymbolStringPool> SSP); /// Link the given graph. /// diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h index 25d1c3aac2c2..b865414e520c 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h @@ -24,8 +24,8 @@ namespace jitlink { /// Note: The graph does not take ownership of the underlying buffer, nor copy /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. -Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer); +Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromELFObject_aarch32( + MemoryBufferRef ObjectBuffer, std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be an ELF arm/thumb object /// file. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h index 50eb598139ea..45a7a0100593 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h @@ -25,8 +25,8 @@ namespace jitlink { /// Note: The graph does not take ownership of the underlying buffer, nor copy /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. -Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer); +Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromELFObject_aarch64( + MemoryBufferRef ObjectBuffer, std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be a ELF aarch64 relocatable /// object file. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h index 44ebd9699461..0752f214d9d5 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h @@ -26,7 +26,8 @@ namespace jitlink { /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject_i386(MemoryBufferRef ObjectBuffer); +createLinkGraphFromELFObject_i386(MemoryBufferRef ObjectBuffer, + std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be a ELF i386 relocatable /// object file. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_loongarch.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_loongarch.h index 4d7655c4b988..7e5d0f1f9185 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_loongarch.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_loongarch.h @@ -25,8 +25,8 @@ namespace jitlink { /// Note: The graph does not take ownership of the underlying buffer, nor copy /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. -Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject_loongarch(MemoryBufferRef ObjectBuffer); +Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromELFObject_loongarch( + MemoryBufferRef ObjectBuffer, std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be an ELF loongarch object /// file. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_ppc64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_ppc64.h index 8db986a4a9fa..c5049a54cdf1 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_ppc64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_ppc64.h @@ -25,15 +25,16 @@ namespace llvm::jitlink { /// /// WARNING: The big-endian backend has not been tested yet. Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer); +createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer, + std::shared_ptr<orc::SymbolStringPool> SSP); /// Create a LinkGraph from an ELF/ppc64le relocatable object. /// /// Note: The graph does not take ownership of the underlying buffer, nor copy /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. -Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject_ppc64le(MemoryBufferRef ObjectBuffer); +Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromELFObject_ppc64le( + MemoryBufferRef ObjectBuffer, std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be a ELF ppc64le object file. /// diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h index a0e573baca06..d00b5c2868ba 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h @@ -26,7 +26,8 @@ namespace jitlink { /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject_riscv(MemoryBufferRef ObjectBuffer); +createLinkGraphFromELFObject_riscv(MemoryBufferRef ObjectBuffer, + std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be a ELF riscv object file. void link_ELF_riscv(std::unique_ptr<LinkGraph> G, diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h index fbe5765438d2..c4f2c532de74 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h @@ -24,7 +24,8 @@ namespace jitlink { /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromELFObject_x86_64(MemoryBufferRef ObjectBuffer); +createLinkGraphFromELFObject_x86_64(MemoryBufferRef ObjectBuffer, + std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be a ELF x86-64 object file. void link_ELF_x86_64(std::unique_ptr<LinkGraph> G, diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 2831ebb3be79..df347049e85d 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -23,6 +23,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h" +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" @@ -396,12 +397,7 @@ const char *getLinkageName(Linkage L); /// SideEffectsOnly -- Like hidden, but symbol can only be looked up once /// to trigger materialization of the containing graph. /// Local -- Visible only within the LinkGraph. -enum class Scope : uint8_t { - Default, - Hidden, - SideEffectsOnly, - Local -}; +enum class Scope : uint8_t { Default, Hidden, SideEffectsOnly, Local }; /// For debugging output. const char *getScopeName(Scope S); @@ -425,10 +421,11 @@ class Symbol { friend class LinkGraph; private: - Symbol(Addressable &Base, orc::ExecutorAddrDiff Offset, StringRef Name, - orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive, - bool IsCallable) - : Name(Name), Base(&Base), Offset(Offset), WeakRef(0), Size(Size) { + Symbol(Addressable &Base, orc::ExecutorAddrDiff Offset, + orc::SymbolStringPtr &&Name, orc::ExecutorAddrDiff Size, Linkage L, + Scope S, bool IsLive, bool IsCallable) + : Name(std::move(Name)), Base(&Base), Offset(Offset), WeakRef(0), + Size(Size) { assert(Offset <= MaxOffset && "Offset out of range"); setLinkage(L); setScope(S); @@ -438,26 +435,29 @@ private: } static Symbol &constructExternal(BumpPtrAllocator &Allocator, - Addressable &Base, StringRef Name, + Addressable &Base, + orc::SymbolStringPtr &&Name, orc::ExecutorAddrDiff Size, Linkage L, bool WeaklyReferenced) { assert(!Base.isDefined() && "Cannot create external symbol from defined block"); - assert(!Name.empty() && "External symbol name cannot be empty"); + assert(Name && "External symbol name cannot be empty"); auto *Sym = Allocator.Allocate<Symbol>(); - new (Sym) Symbol(Base, 0, Name, Size, L, Scope::Default, false, false); + new (Sym) + Symbol(Base, 0, std::move(Name), Size, L, Scope::Default, false, false); Sym->setWeaklyReferenced(WeaklyReferenced); return *Sym; } static Symbol &constructAbsolute(BumpPtrAllocator &Allocator, - Addressable &Base, StringRef Name, + Addressable &Base, + orc::SymbolStringPtr &&Name, orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive) { assert(!Base.isDefined() && "Cannot create absolute symbol from a defined block"); auto *Sym = Allocator.Allocate<Symbol>(); - new (Sym) Symbol(Base, 0, Name, Size, L, S, IsLive, false); + new (Sym) Symbol(Base, 0, std::move(Name), Size, L, S, IsLive, false); return *Sym; } @@ -468,20 +468,22 @@ private: assert((Offset + Size) <= Base.getSize() && "Symbol extends past end of block"); auto *Sym = Allocator.Allocate<Symbol>(); - new (Sym) Symbol(Base, Offset, StringRef(), Size, Linkage::Strong, - Scope::Local, IsLive, IsCallable); + new (Sym) Symbol(Base, Offset, nullptr, Size, Linkage::Strong, Scope::Local, + IsLive, IsCallable); return *Sym; } static Symbol &constructNamedDef(BumpPtrAllocator &Allocator, Block &Base, - orc::ExecutorAddrDiff Offset, StringRef Name, + orc::ExecutorAddrDiff Offset, + orc::SymbolStringPtr Name, orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive, bool IsCallable) { assert((Offset + Size) <= Base.getSize() && "Symbol extends past end of block"); - assert(!Name.empty() && "Name cannot be empty"); + assert(Name && "Name cannot be empty"); auto *Sym = Allocator.Allocate<Symbol>(); - new (Sym) Symbol(Base, Offset, Name, Size, L, S, IsLive, IsCallable); + new (Sym) + Symbol(Base, Offset, std::move(Name), Size, L, S, IsLive, IsCallable); return *Sym; } @@ -498,18 +500,19 @@ public: Symbol &operator=(Symbol &&) = delete; /// Returns true if this symbol has a name. - bool hasName() const { return !Name.empty(); } + bool hasName() const { return Name != nullptr; } /// Returns the name of this symbol (empty if the symbol is anonymous). - StringRef getName() const { - assert((!Name.empty() || getScope() == Scope::Local) && + const orc::SymbolStringPtr &getName() const { + assert((hasName() || getScope() == Scope::Local) && "Anonymous symbol has non-local scope"); + return Name; } /// Rename this symbol. The client is responsible for updating scope and /// linkage if this name-change requires it. - void setName(StringRef Name) { this->Name = Name; } + void setName(const orc::SymbolStringPtr Name) { this->Name = Name; } /// Returns true if this Symbol has content (potentially) defined within this /// object file (i.e. is anything but an external or absolute symbol). @@ -616,7 +619,7 @@ public: /// Set the linkage for this Symbol. void setLinkage(Linkage L) { - assert((L == Linkage::Strong || (!Base->isAbsolute() && !Name.empty())) && + assert((L == Linkage::Strong || (!Base->isAbsolute() && Name)) && "Linkage can only be applied to defined named symbols"); this->L = static_cast<uint8_t>(L); } @@ -626,7 +629,7 @@ public: /// Set the visibility for this Symbol. void setScope(Scope S) { - assert((!Name.empty() || S == Scope::Local) && + assert((hasName() || S == Scope::Local) && "Can not set anonymous symbol to non-local scope"); assert((S != Scope::Local || Base->isDefined() || Base->isAbsolute()) && "Invalid visibility for symbol type"); @@ -678,8 +681,7 @@ private: static constexpr uint64_t MaxOffset = (1ULL << 59) - 1; - // FIXME: A char* or SymbolStringPtr may pack better. - StringRef Name; + orc::SymbolStringPtr Name = nullptr; Addressable *Base = nullptr; uint64_t Offset : 57; uint64_t L : 1; @@ -1004,22 +1006,23 @@ public: using GetEdgeKindNameFunction = const char *(*)(Edge::Kind); - LinkGraph(std::string Name, const Triple &TT, SubtargetFeatures Features, - unsigned PointerSize, llvm::endianness Endianness, - GetEdgeKindNameFunction GetEdgeKindName) - : Name(std::move(Name)), TT(TT), Features(std::move(Features)), - PointerSize(PointerSize), Endianness(Endianness), - GetEdgeKindName(std::move(GetEdgeKindName)) {} - - LinkGraph(std::string Name, const Triple &TT, unsigned PointerSize, + LinkGraph(std::string Name, std::shared_ptr<orc::SymbolStringPool> SSP, + const Triple &TT, SubtargetFeatures Features, unsigned PointerSize, llvm::endianness Endianness, GetEdgeKindNameFunction GetEdgeKindName) - : LinkGraph(std::move(Name), TT, SubtargetFeatures(), PointerSize, - Endianness, GetEdgeKindName) {} + : Name(std::move(Name)), SSP(std::move(SSP)), TT(TT), + Features(std::move(Features)), PointerSize(PointerSize), + Endianness(Endianness), GetEdgeKindName(std::move(GetEdgeKindName)) {} - LinkGraph(std::string Name, const Triple &TT, + LinkGraph(std::string Name, std::shared_ptr<orc::SymbolStringPool> SSP, + const Triple &TT, unsigned PointerSize, llvm::endianness Endianness, GetEdgeKindNameFunction GetEdgeKindName) - : LinkGraph(std::move(Name), TT, SubtargetFeatures(), + : LinkGraph(std::move(Name), std::move(SSP), TT, SubtargetFeatures(), + PointerSize, Endianness, GetEdgeKindName) {} + + LinkGraph(std::string Name, std::shared_ptr<orc::SymbolStringPool> SSP, + const Triple &TT, GetEdgeKindNameFunction GetEdgeKindName) + : LinkGraph(std::move(Name), std::move(SSP), TT, SubtargetFeatures(), Triple::getArchPointerBitWidth(TT.getArch()) / 8, TT.isLittleEndian() ? endianness::little : endianness::big, GetEdgeKindName) { @@ -1031,6 +1034,7 @@ public: LinkGraph &operator=(const LinkGraph &) = delete; LinkGraph(LinkGraph &&) = delete; LinkGraph &operator=(LinkGraph &&) = delete; + ~LinkGraph(); /// Returns the name of this graph (usually the name of the original /// underlying MemoryBuffer). @@ -1050,6 +1054,8 @@ public: const char *getEdgeKindName(Edge::Kind K) const { return GetEdgeKindName(K); } + std::shared_ptr<orc::SymbolStringPool> getSymbolStringPool() { return SSP; } + /// Allocate a mutable buffer of the given size using the LinkGraph's /// allocator. MutableArrayRef<char> allocateBuffer(size_t Size) { @@ -1263,6 +1269,10 @@ public: return splitBlockImpl(std::move(Blocks), Cache); } + // + orc::SymbolStringPtr intern(StringRef SymbolName) { + return SSP->intern(SymbolName); + } /// Add an external symbol. /// Some formats (e.g. ELF) allow Symbols to have sizes. For Symbols whose /// size is not known, you should substitute '0'. @@ -1271,18 +1281,25 @@ public: /// found or an error will be emitted. Externals that are weakly referenced /// are permitted to be undefined, in which case they are assigned an address /// of 0. - Symbol &addExternalSymbol(StringRef Name, orc::ExecutorAddrDiff Size, + Symbol &addExternalSymbol(orc::SymbolStringPtr Name, + orc::ExecutorAddrDiff Size, bool IsWeaklyReferenced) { - assert(!ExternalSymbols.contains(Name) && "Duplicate external symbol"); + assert(!ExternalSymbols.contains(*Name) && "Duplicate external symbol"); auto &Sym = Symbol::constructExternal( - Allocator, createAddressable(orc::ExecutorAddr(), false), Name, Size, - Linkage::Strong, IsWeaklyReferenced); - ExternalSymbols.insert({Sym.getName(), &Sym}); + Allocator, createAddressable(orc::ExecutorAddr(), false), + std::move(Name), Size, Linkage::Strong, IsWeaklyReferenced); + ExternalSymbols.insert({*Sym.getName(), &Sym}); return Sym; } + Symbol &addExternalSymbol(StringRef Name, orc::ExecutorAddrDiff Size, + bool IsWeaklyReferenced) { + return addExternalSymbol(SSP->intern(Name), Size, IsWeaklyReferenced); + } + /// Add an absolute symbol. - Symbol &addAbsoluteSymbol(StringRef Name, orc::ExecutorAddr Address, + Symbol &addAbsoluteSymbol(orc::SymbolStringPtr Name, + orc::ExecutorAddr Address, orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive) { assert((S == Scope::Local || llvm::count_if(AbsoluteSymbols, @@ -1291,11 +1308,18 @@ public: }) == 0) && "Duplicate absolute symbol"); auto &Sym = Symbol::constructAbsolute(Allocator, createAddressable(Address), - Name, Size, L, S, IsLive); + std::move(Name), Size, L, S, IsLive); AbsoluteSymbols.insert(&Sym); return Sym; } + Symbol &addAbsoluteSymbol(StringRef Name, orc::ExecutorAddr Address, + orc::ExecutorAddrDiff Size, Linkage L, Scope S, + bool IsLive) { + + return addAbsoluteSymbol(SSP->intern(Name), Address, Size, L, S, IsLive); + } + /// Add an anonymous symbol. Symbol &addAnonymousSymbol(Block &Content, orc::ExecutorAddrDiff Offset, orc::ExecutorAddrDiff Size, bool IsCallable, @@ -1310,13 +1334,22 @@ public: Symbol &addDefinedSymbol(Block &Content, orc::ExecutorAddrDiff Offset, StringRef Name, orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsCallable, bool IsLive) { + return addDefinedSymbol(Content, Offset, SSP->intern(Name), Size, L, S, + IsCallable, IsLive); + } + + Symbol &addDefinedSymbol(Block &Content, orc::ExecutorAddrDiff Offset, + orc::SymbolStringPtr Name, + orc::ExecutorAddrDiff Size, Linkage L, Scope S, + bool IsCallable, bool IsLive) { assert((S == Scope::Local || llvm::count_if(defined_symbols(), [&](const Symbol *Sym) { return Sym->getName() == Name; }) == 0) && "Duplicate defined symbol"); - auto &Sym = Symbol::constructNamedDef(Allocator, Content, Offset, Name, - Size, L, S, IsLive, IsCallable); + auto &Sym = + Symbol::constructNamedDef(Allocator, Content, Offset, std::move(Name), + Size, L, S, IsLive, IsCallable); Content.getSection().addSymbol(Sym); return Sym; } @@ -1401,7 +1434,7 @@ public: Sec.removeSymbol(Sym); Sym.makeExternal(createAddressable(orc::ExecutorAddr(), false)); } - ExternalSymbols.insert({Sym.getName(), &Sym}); + ExternalSymbols.insert({*Sym.getName(), &Sym}); } /// Make the given symbol an absolute with the given address (must not already @@ -1415,10 +1448,10 @@ public: void makeAbsolute(Symbol &Sym, orc::ExecutorAddr Address) { assert(!Sym.isAbsolute() && "Symbol is already absolute"); if (Sym.isExternal()) { - assert(ExternalSymbols.contains(Sym.getName()) && + assert(ExternalSymbols.contains(*Sym.getName()) && "Sym is not in the absolute symbols set"); assert(Sym.getOffset() == 0 && "External is not at offset 0"); - ExternalSymbols.erase(Sym.getName()); + ExternalSymbols.erase(*Sym.getName()); auto &A = Sym.getAddressable(); A.setAbsolute(true); A.setAddress(Address); @@ -1443,9 +1476,9 @@ public: "Symbol is not in the absolutes set"); AbsoluteSymbols.erase(&Sym); } else { - assert(ExternalSymbols.contains(Sym.getName()) && + assert(ExternalSymbols.contains(*Sym.getName()) && "Symbol is not in the externals set"); - ExternalSymbols.erase(Sym.getName()); + ExternalSymbols.erase(*Sym.getName()); } Addressable &OldBase = *Sym.Base; Sym.setBlock(Content); @@ -1530,9 +1563,9 @@ public: void removeExternalSymbol(Symbol &Sym) { assert(!Sym.isDefined() && !Sym.isAbsolute() && "Sym is not an external symbol"); - assert(ExternalSymbols.contains(Sym.getName()) && + assert(ExternalSymbols.contains(*Sym.getName()) && "Symbol is not in the externals set"); - ExternalSymbols.erase(Sym.getName()); + ExternalSymbols.erase(*Sym.getName()); Addressable &Base = *Sym.Base; assert(llvm::none_of(external_symbols(), [&](Symbol *AS) { return AS->Base == &Base; }) && @@ -1603,12 +1636,14 @@ private: BumpPtrAllocator Allocator; std::string Name; + std::shared_ptr<orc::SymbolStringPool> SSP; Triple TT; SubtargetFeatures Features; unsigned PointerSize; llvm::endianness Endianness; GetEdgeKindNameFunction GetEdgeKindName = nullptr; DenseMap<StringRef, std::unique_ptr<Section>> Sections; + // FIXME(jared): these should become dense maps ExternalSymbolMap ExternalSymbols; AbsoluteSymbolSet AbsoluteSymbols; orc::shared::AllocActions AAs; @@ -1831,7 +1866,8 @@ enum class SymbolLookupFlags { RequiredSymbol, WeaklyReferencedSymbol }; raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF); /// A map of symbol names to resolved addresses. -using AsyncLookupResult = DenseMap<StringRef, orc::ExecutorSymbolDef>; +using AsyncLookupResult = + DenseMap<orc::SymbolStringPtr, orc::ExecutorSymbolDef>; /// A function object to call with a resolved symbol map (See AsyncLookupResult) /// or an error if resolution failed. @@ -1864,7 +1900,7 @@ createLookupContinuation(Continuation Cont) { /// Holds context for a single jitLink invocation. class JITLinkContext { public: - using LookupMap = DenseMap<StringRef, SymbolLookupFlags>; + using LookupMap = DenseMap<orc::SymbolStringPtr, SymbolLookupFlags>; /// Create a JITLinkContext. JITLinkContext(const JITLinkDylib *JD) : JD(JD) {} @@ -1997,11 +2033,14 @@ void visitExistingEdges(LinkGraph &G, VisitorTs &&...Vs) { /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromObject(MemoryBufferRef ObjectBuffer); +createLinkGraphFromObject(MemoryBufferRef ObjectBuffer, + std::shared_ptr<orc::SymbolStringPool> SSP); /// Create a \c LinkGraph defining the given absolute symbols. -std::unique_ptr<LinkGraph> absoluteSymbolsLinkGraph(const Triple &TT, - orc::SymbolMap Symbols); +std::unique_ptr<LinkGraph> +absoluteSymbolsLinkGraph(const Triple &TT, + std::shared_ptr<orc::SymbolStringPool> SSP, + orc::SymbolMap Symbols); /// Link the given graph. void link(std::unique_ptr<LinkGraph> G, std::unique_ptr<JITLinkContext> Ctx); diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 75f9f4bbe614..1f8bab24c482 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -19,6 +19,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/AllocationActions.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h" +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include "llvm/Support/MSVCErrorWorkarounds.h" @@ -320,12 +321,15 @@ public: using OnFinalizedFunction = JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction; - static void Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD, - SegmentMap Segments, OnCreatedFunction OnCreated); + static void Create(JITLinkMemoryManager &MemMgr, + std::shared_ptr<orc::SymbolStringPool> SSP, + const JITLinkDylib *JD, SegmentMap Segments, + OnCreatedFunction OnCreated); - static Expected<SimpleSegmentAlloc> Create(JITLinkMemoryManager &MemMgr, - const JITLinkDylib *JD, - SegmentMap Segments); + static Expected<SimpleSegmentAlloc> + Create(JITLinkMemoryManager &MemMgr, + std::shared_ptr<orc::SymbolStringPool> SSP, const JITLinkDylib *JD, + SegmentMap Segments); SimpleSegmentAlloc(SimpleSegmentAlloc &&); SimpleSegmentAlloc &operator=(SimpleSegmentAlloc &&); diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h index bb8da0ab9db2..2010b32cdf76 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h @@ -25,7 +25,8 @@ namespace jitlink { /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromMachOObject(MemoryBufferRef ObjectBuffer); +createLinkGraphFromMachOObject(MemoryBufferRef ObjectBuffer, + std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given ObjBuffer, which must be a MachO object file. /// diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h index 31721bf999ec..31770c2a601e 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h @@ -23,8 +23,8 @@ namespace jitlink { /// Note: The graph does not take ownership of the underlying buffer, nor copy /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. -Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromMachOObject_arm64(MemoryBufferRef ObjectBuffer); +Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromMachOObject_arm64( + MemoryBufferRef ObjectBuffer, std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given object buffer, which must be a MachO arm64 object file. /// diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h index 6aee8c354f91..72e9883ba18b 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h @@ -23,8 +23,8 @@ namespace jitlink { /// Note: The graph does not take ownership of the underlying buffer, nor copy /// its contents. The caller is responsible for ensuring that the object buffer /// outlives the graph. -Expected<std::unique_ptr<LinkGraph>> -createLinkGraphFromMachOObject_x86_64(MemoryBufferRef ObjectBuffer); +Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromMachOObject_x86_64( + MemoryBufferRef ObjectBuffer, std::shared_ptr<orc::SymbolStringPool> SSP); /// jit-link the given LinkGraph. /// diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h index 7ab8ae3e53ce..3d068d97202d 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h @@ -69,7 +69,7 @@ public: private: TableManagerImplT &impl() { return static_cast<TableManagerImplT &>(*this); } - DenseMap<StringRef, Symbol *> Entries; + DenseMap<orc::SymbolStringPtr, Symbol *> Entries; }; } // namespace jitlink diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h index db440c378d24..62221caa71c9 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h @@ -755,6 +755,32 @@ inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G, sizeof(PointerJumpStubContent), true, false); } +/// AArch64 reentry trampoline. +/// +/// Contains the instruction sequence for a trampoline that stores its return +/// address (and stack pointer) on the stack and calls the given reentry symbol: +/// STP x29, x30, [sp, #-16]! +/// BL <reentry-symbol> +extern const char ReentryTrampolineContent[8]; + +/// Create a block of N reentry trampolines. +inline Block &createReentryTrampolineBlock(LinkGraph &G, + Section &TrampolineSection, + Symbol &ReentrySymbol) { + auto &B = G.createContentBlock(TrampolineSection, ReentryTrampolineContent, + orc::ExecutorAddr(~uint64_t(7)), 4, 0); + B.addEdge(Branch26PCRel, 4, ReentrySymbol, 0); + return B; +} + +inline Symbol &createAnonymousReentryTrampoline(LinkGraph &G, + Section &TrampolineSection, + Symbol &ReentrySymbol) { + return G.addAnonymousSymbol( + createReentryTrampolineBlock(G, TrampolineSection, ReentrySymbol), 0, + sizeof(ReentryTrampolineContent), true, false); +} + /// Global Offset Table Builder. class GOTTableManager : public TableManager<GOTTableManager> { public: diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/loongarch.h b/llvm/include/llvm/ExecutionEngine/JITLink/loongarch.h index 39a7db32258c..d31c749bad1b 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/loongarch.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/loongarch.h @@ -41,6 +41,50 @@ enum EdgeKind_loongarch : Edge::Kind { /// Pointer32, + /// A 16-bit PC-relative branch. + /// + /// Represents a PC-relative branch to a target within +/-128Kb. The target + /// must be 4-byte aligned. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 2 : int16 + /// + /// Notes: + /// The '16' in the name refers to the number operand bits and follows the + /// naming convention used by the corresponding ELF relocations. Since the low + /// two bits must be zero (because of the 4-byte alignment of the target) the + /// operand is effectively a signed 18-bit number. + /// + /// Errors: + /// - The result of the unshifted part of the fixup expression must be + /// 4-byte aligned otherwise an alignment error will be returned. + /// - The result of the fixup expression must fit into an int16 otherwise an + /// out-of-range error will be returned. + /// + Branch16PCRel, + + /// A 21-bit PC-relative branch. + /// + /// Represents a PC-relative branch to a target within +/-4Mb. The Target must + /// be 4-byte aligned. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 2 : int21 + /// + /// Notes: + /// The '21' in the name refers to the number operand bits and follows the + /// naming convention used by the corresponding ELF relocations. Since the low + /// two bits must be zero (because of the 4-byte alignment of the target) the + /// operand is effectively a signed 23-bit number. + /// + /// Errors: + /// - The result of the unshifted part of the fixup expression must be + /// 4-byte aligned otherwise an alignment error will be returned. + /// - The result of the fixup expression must fit into an int21 otherwise an + /// out-of-range error will be returned. + /// + Branch21PCRel, + /// A 26-bit PC-relative branch. /// /// Represents a PC-relative call or branch to a target within +/-128Mb. The @@ -213,6 +257,37 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) { *(ulittle32_t *)FixupPtr = Value; break; } + case Branch16PCRel: { + int64_t Value = TargetAddress - FixupAddress + Addend; + + if (!isInt<18>(Value)) + return makeTargetOutOfRangeError(G, B, E); + + if (!isShiftedInt<16, 2>(Value)) + return makeAlignmentError(orc::ExecutorAddr(FixupAddress), Value, 4, E); + + uint32_t RawInstr = *(little32_t *)FixupPtr; + uint32_t Imm = static_cast<uint32_t>(Value >> 2); + uint32_t Imm15_0 = extractBits(Imm, /*Hi=*/15, /*Lo=*/0) << 10; + *(little32_t *)FixupPtr = RawInstr | Imm15_0; + break; + } + case Branch21PCRel: { + int64_t Value = TargetAddress - FixupAddress + Addend; + + if (!isInt<23>(Value)) + return makeTargetOutOfRangeError(G, B, E); + + if (!isShiftedInt<21, 2>(Value)) + return makeAlignmentError(orc::ExecutorAddr(FixupAddress), Value, 4, E); + + uint32_t RawInstr = *(little32_t *)FixupPtr; + uint32_t Imm = static_cast<uint32_t>(Value >> 2); + uint32_t Imm15_0 = extractBits(Imm, /*Hi=*/15, /*Lo=*/0) << 10; + uint32_t Imm20_16 = extractBits(Imm, /*Hi=*/20, /*Lo=*/16); + *(little32_t *)FixupPtr = RawInstr | Imm15_0 | Imm20_16; + break; + } case Branch26PCRel: { int64_t Value = TargetAddress - FixupAddress + Addend; diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h index 0d7e0fdb5820..356b8cd70aec 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h @@ -641,6 +641,31 @@ inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G, false); } +/// x86-64 reentry trampoline. +/// +/// Contains the instruction sequence for a trampoline that stores its return +/// address on the stack and calls <reentry-symbol>: +/// call <reentry-symbol> +extern const char ReentryTrampolineContent[5]; + +/// Create a block of N reentry trampolines. +inline Block &createReentryTrampolineBlock(LinkGraph &G, + Section &TrampolineSection, + Symbol &ReentrySymbol) { + auto &B = G.createContentBlock(TrampolineSection, ReentryTrampolineContent, + orc::ExecutorAddr(~uint64_t(7)), 1, 0); + B.addEdge(BranchPCRel32, 1, ReentrySymbol, 0); + return B; +} + +inline Symbol &createAnonymousReentryTrampoline(LinkGraph &G, + Section &TrampolineSection, + Symbol &ReentrySymbol) { + return G.addAnonymousSymbol( + createReentryTrampolineBlock(G, TrampolineSection, ReentrySymbol), 0, + sizeof(ReentryTrampolineContent), true, false); +} + /// Global Offset Table Builder. class GOTTableManager : public TableManager<GOTTableManager> { public: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h index f44b6b3860fc..cebab0f2b30a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h @@ -22,6 +22,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include <future> +#include <list> #include <memory> #include <thread> #include <vector> diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index e892005c53d8..2788932ca4bc 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -51,6 +51,26 @@ enum class SymbolState : uint8_t; using ResourceTrackerSP = IntrusiveRefCntPtr<ResourceTracker>; using JITDylibSP = IntrusiveRefCntPtr<JITDylib>; +/// A definition of a Symbol within a JITDylib. +class SymbolInstance { +public: + using LookupAsyncOnCompleteFn = + unique_function<void(Expected<ExecutorSymbolDef>)>; + + SymbolInstance(JITDylibSP JD, SymbolStringPtr Name) + : JD(std::move(JD)), Name(std::move(Name)) {} + + const JITDylib &getJITDylib() const { return *JD; } + const SymbolStringPtr &getName() const { return Name; } + + Expected<ExecutorSymbolDef> lookup() const; + void lookupAsync(LookupAsyncOnCompleteFn OnComplete) const; + +private: + JITDylibSP JD; + SymbolStringPtr Name; +}; + using ResourceKey = uintptr_t; /// API to remove / transfer ownership of JIT resources. @@ -105,7 +125,14 @@ private: class ResourceManager { public: virtual ~ResourceManager(); + + /// This function will be called *outside* the session lock. ResourceManagers + /// should perform book-keeping under the session lock, and any expensive + /// cleanup outside the session lock. virtual Error handleRemoveResources(JITDylib &JD, ResourceKey K) = 0; + + /// This function will be called *inside* the session lock. ResourceManagers + /// DO NOT need to re-lock the session. virtual void handleTransferResources(JITDylib &JD, ResourceKey DstK, ResourceKey SrcK) = 0; }; @@ -173,6 +200,11 @@ public: SymbolLookupSet() = default; + SymbolLookupSet(std::initializer_list<value_type> Elems) { + for (auto &E : Elems) + Symbols.push_back(std::move(E)); + } + explicit SymbolLookupSet( SymbolStringPtr Name, SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { @@ -550,6 +582,9 @@ public: /// emitted or notified of an error. ~MaterializationResponsibility(); + /// Return the ResourceTracker associated with this instance. + const ResourceTrackerSP &getResourceTracker() const { return RT; } + /// Runs the given callback under the session lock, passing in the associated /// ResourceKey. This is the safe way to associate resources with trackers. template <typename Func> Error withResourceKeyDo(Func &&F) const { @@ -1748,6 +1783,10 @@ private: JITDispatchHandlers; }; +inline Expected<ExecutorSymbolDef> SymbolInstance::lookup() const { + return JD->getExecutionSession().lookup({JD.get()}, Name); +} + template <typename Func> Error ResourceTracker::withResourceKeyDo(Func &&F) { return getJITDylib().getExecutionSession().runSessionLocked([&]() -> Error { if (isDefunct()) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h index 54442c91096b..f19cfce16d4e 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h @@ -138,6 +138,11 @@ public: static ArrayRef<std::pair<const char *, const char *>> standardRuntimeUtilityAliases(); + /// Returns a list of aliases required to enable lazy compilation via the + /// ORC runtime. + static ArrayRef<std::pair<const char *, const char *>> + standardLazyCompilationAliases(); + private: // Data needed for bootstrap only. struct BootstrapInfo { @@ -151,6 +156,7 @@ private: RuntimeFunction *func1, RuntimeFunction *func2, const shared::WrapperFunctionCall::ArgDataBufferType &arg1, const shared::WrapperFunctionCall::ArgDataBufferType &arg2) { + std::lock_guard<std::mutex> Lock(Mutex); auto &argList = DeferredRTFnMap[std::make_pair(func1, func2)]; argList.emplace_back(arg1, arg2); } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h index 5d5326c4a469..dcf5592f1717 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h @@ -181,7 +181,7 @@ public: ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D) - : SSP(std::move(SSP)), D(std::move(D)) {} + : SSP(std::move(SSP)), D(std::move(D)) {} virtual ~ExecutorProcessControl(); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkLazyCallThroughManager.h b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkLazyCallThroughManager.h new file mode 100644 index 000000000000..19075c76a607 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkLazyCallThroughManager.h @@ -0,0 +1,26 @@ +//===- JITLinkLazyCallThroughManager.h - JITLink based laziness -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Redirectable Symbol Manager implementation using JITLink +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKLAZYCALLTHROUGHMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_JITLINKLAZYCALLTHROUGHMANAGER_H + +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/RedirectionManager.h" +#include "llvm/Support/StringSaver.h" + +#include <atomic> + +namespace llvm { +namespace orc {} // namespace orc +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_JITLINKLAZYCALLTHROUGHMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h index 81d1d154d560..83339e56cfa5 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h @@ -39,12 +39,6 @@ public: ObjLinkingLayer, AnonymousPtrCreator, PtrJumpStubCreator)); } - void emitRedirectableSymbols(std::unique_ptr<MaterializationResponsibility> R, - SymbolMap InitialDests) override; - - Error redirect(JITDylib &JD, const SymbolMap &NewDests) override; - -private: JITLinkRedirectableSymbolManager( ObjectLinkingLayer &ObjLinkingLayer, jitlink::AnonymousPointerCreator &AnonymousPtrCreator, @@ -53,6 +47,14 @@ private: AnonymousPtrCreator(std::move(AnonymousPtrCreator)), PtrJumpStubCreator(std::move(PtrJumpStubCreator)) {} + ObjectLinkingLayer &getObjectLinkingLayer() const { return ObjLinkingLayer; } + + void emitRedirectableSymbols(std::unique_ptr<MaterializationResponsibility> R, + SymbolMap InitialDests) override; + + Error redirect(JITDylib &JD, const SymbolMap &NewDests) override; + +private: ObjectLinkingLayer &ObjLinkingLayer; jitlink::AnonymousPointerCreator AnonymousPtrCreator; jitlink::PointerJumpStubCreator PtrJumpStubCreator; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkReentryTrampolines.h b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkReentryTrampolines.h new file mode 100644 index 000000000000..673019b748b3 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkReentryTrampolines.h @@ -0,0 +1,72 @@ +//===- JITLinkReentryTrampolines.h -- JITLink-based trampolines -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Emit reentry trampolines via JITLink. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKREENTRYTRAMPOLINES_H +#define LLVM_EXECUTIONENGINE_ORC_JITLINKREENTRYTRAMPOLINES_H + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/LazyReexports.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" +#include "llvm/Support/Error.h" + +namespace llvm::jitlink { +class Block; +class LinkGraph; +class Section; +class Symbol; +} // namespace llvm::jitlink + +namespace llvm::orc { + +class ObjectLinkingLayer; +class RedirectableSymbolManager; + +/// Produces trampolines on request using JITLink. +class JITLinkReentryTrampolines { +public: + using EmitTrampolineFn = unique_function<jitlink::Symbol &( + jitlink::LinkGraph &G, jitlink::Section &Sec, + jitlink::Symbol &ReentrySym)>; + using OnTrampolinesReadyFn = unique_function<void( + Expected<std::vector<ExecutorSymbolDef>> EntryAddrs)>; + + /// Create trampolines using the default reentry trampoline function for + /// the session triple. + static Expected<std::unique_ptr<JITLinkReentryTrampolines>> + Create(ObjectLinkingLayer &ObjLinkingLayer); + + JITLinkReentryTrampolines(ObjectLinkingLayer &ObjLinkingLayer, + EmitTrampolineFn EmitTrampoline); + JITLinkReentryTrampolines(JITLinkReentryTrampolines &&) = delete; + JITLinkReentryTrampolines &operator=(JITLinkReentryTrampolines &&) = delete; + + void emit(ResourceTrackerSP RT, size_t NumTrampolines, + OnTrampolinesReadyFn OnTrampolinesReady); + +private: + class TrampolineAddrScraperPlugin; + + ObjectLinkingLayer &ObjLinkingLayer; + TrampolineAddrScraperPlugin *TrampolineAddrScraper = nullptr; + EmitTrampolineFn EmitTrampoline; + std::atomic<size_t> ReentryGraphIdx{0}; +}; + +Expected<std::unique_ptr<LazyReexportsManager>> +createJITLinkLazyReexportsManager(ObjectLinkingLayer &ObjLinkingLayer, + RedirectableSymbolManager &RSMgr, + JITDylib &PlatformJD); + +} // namespace llvm::orc + +#endif // LLVM_EXECUTIONENGINE_ORC_JITLINKREENTRYTRAMPOLINES_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyObjectLinkingLayer.h index 800f6773f16f..8a0350b59d39 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LazyObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyObjectLinkingLayer.h @@ -1,4 +1,4 @@ -//===- RedirectionManager.h - Redirection manager interface -----*- C++ -*-===// +//===- LazyObjectLinkingLayer.h - Link objects on first fn call -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// Redirection manager interface that redirects a call to symbol to another. +// Link object files lazily on first call. // //===----------------------------------------------------------------------===// #ifndef LLVM_EXECUTIONENGINE_ORC_LAZYOBJECTLINKINGLAYER_H @@ -18,18 +18,30 @@ namespace llvm::orc { class ObjectLinkingLayer; -class LazyCallThroughManager; +class LazyReexportsManager; class RedirectableSymbolManager; +/// LazyObjectLinkingLayer is an adapter for ObjectLinkingLayer that builds +/// lazy reexports for all function symbols in objects that are/ added to defer +/// linking until the first call to a function defined in the object. +/// +/// Linking is performed by emitting the object file via the base +/// ObjectLinkingLayer. +/// +/// No partitioning is performed: The first call to any function in the object +/// will trigger linking of the whole object. +/// +/// References to data symbols are not lazy and will trigger immediate linking +/// (same os ObjectlinkingLayer). class LazyObjectLinkingLayer : public ObjectLayer { public: LazyObjectLinkingLayer(ObjectLinkingLayer &BaseLayer, - LazyCallThroughManager &LCTMgr, - RedirectableSymbolManager &RSMgr); + LazyReexportsManager &LRMgr); + /// Add an object file to the JITDylib targeted by the given tracker. llvm::Error add(llvm::orc::ResourceTrackerSP RT, - std::unique_ptr<llvm::MemoryBuffer> O, - llvm::orc::MaterializationUnit::Interface I) override; + std::unique_ptr<MemoryBuffer> O, + MaterializationUnit::Interface I) override; void emit(std::unique_ptr<MaterializationResponsibility> R, std::unique_ptr<MemoryBuffer> O) override; @@ -38,8 +50,7 @@ private: class RenamerPlugin; ObjectLinkingLayer &BaseLayer; - LazyCallThroughManager &LCTMgr; - RedirectableSymbolManager &RSMgr; + LazyReexportsManager &LRMgr; }; } // namespace llvm::orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h index 6a43cb6fb6ca..c6b5d08544b1 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h @@ -173,6 +173,75 @@ lazyReexports(LazyCallThroughManager &LCTManager, LCTManager, RSManager, SourceJD, std::move(CallableAliases), SrcJDLoc); } +class LazyReexportsManager : public ResourceManager { + + friend std::unique_ptr<MaterializationUnit> + lazyReexports(LazyReexportsManager &, SymbolAliasMap); + +public: + using OnTrampolinesReadyFn = unique_function<void( + Expected<std::vector<ExecutorSymbolDef>> EntryAddrs)>; + using EmitTrampolinesFn = + unique_function<void(ResourceTrackerSP RT, size_t NumTrampolines, + OnTrampolinesReadyFn OnTrampolinesReady)>; + + /// Create a LazyReexportsManager that uses the ORC runtime for reentry. + /// This will work both in-process and out-of-process. + static Expected<std::unique_ptr<LazyReexportsManager>> + Create(EmitTrampolinesFn EmitTrampolines, RedirectableSymbolManager &RSMgr, + JITDylib &PlatformJD); + + LazyReexportsManager(LazyReexportsManager &&) = delete; + LazyReexportsManager &operator=(LazyReexportsManager &&) = delete; + + Error handleRemoveResources(JITDylib &JD, ResourceKey K) override; + void handleTransferResources(JITDylib &JD, ResourceKey DstK, + ResourceKey SrcK) override; + +private: + struct CallThroughInfo { + SymbolStringPtr Name; + SymbolStringPtr BodyName; + JITDylibSP JD; + }; + + class MU; + class Plugin; + + using ResolveSendResultFn = + unique_function<void(Expected<ExecutorSymbolDef>)>; + + LazyReexportsManager(EmitTrampolinesFn EmitTrampolines, + RedirectableSymbolManager &RSMgr, JITDylib &PlatformJD, + Error &Err); + + std::unique_ptr<MaterializationUnit> + createLazyReexports(SymbolAliasMap Reexports); + + void emitReentryTrampolines(std::unique_ptr<MaterializationResponsibility> MR, + SymbolAliasMap Reexports); + void emitRedirectableSymbols( + std::unique_ptr<MaterializationResponsibility> MR, + SymbolAliasMap Reexports, + Expected<std::vector<ExecutorSymbolDef>> ReentryPoints); + void resolve(ResolveSendResultFn SendResult, ExecutorAddr ReentryStubAddr); + + ExecutionSession &ES; + EmitTrampolinesFn EmitTrampolines; + RedirectableSymbolManager &RSMgr; + + DenseMap<ResourceKey, std::vector<ExecutorAddr>> KeyToReentryAddrs; + DenseMap<ExecutorAddr, CallThroughInfo> CallThroughs; +}; + +/// Define lazy-reexports based on the given SymbolAliasMap. Each lazy re-export +/// is a callable symbol that will look up and dispatch to the given aliasee on +/// first call. All subsequent calls will go directly to the aliasee. +inline std::unique_ptr<MaterializationUnit> +lazyReexports(LazyReexportsManager &LRM, SymbolAliasMap Reexports) { + return LRM.createLazyReexports(std::move(Reexports)); +} + } // End namespace orc } // End namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LinkGraphLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/LinkGraphLayer.h new file mode 100644 index 000000000000..771a118efae2 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/LinkGraphLayer.h @@ -0,0 +1,110 @@ +//===- LinkGraphLayer.h - Add LinkGraphs to an ExecutionSession -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// LinkGraphLayer and associated utilities. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_LINKGRAPHLAYER_H +#define LLVM_EXECUTIONENGINE_ORC_LINKGRAPHLAYER_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +#include <atomic> +#include <memory> + +namespace llvm::orc { + +class LinkGraphLayer { +public: + LinkGraphLayer(ExecutionSession &ES) : ES(ES) {} + + virtual ~LinkGraphLayer(); + + ExecutionSession &getExecutionSession() { return ES; } + + /// Adds a LinkGraph to the JITDylib for the given ResourceTracker. + virtual Error add(ResourceTrackerSP RT, std::unique_ptr<jitlink::LinkGraph> G, + MaterializationUnit::Interface I); + + /// Adds a LinkGraph to the JITDylib for the given ResourceTracker. The + /// interface for the graph will be built using getLinkGraphInterface. + Error add(ResourceTrackerSP RT, std::unique_ptr<jitlink::LinkGraph> G) { + auto LGI = getInterface(*G); + return add(std::move(RT), std::move(G), std::move(LGI)); + } + + /// Adds a LinkGraph to the given JITDylib. + Error add(JITDylib &JD, std::unique_ptr<jitlink::LinkGraph> G, + MaterializationUnit::Interface I) { + return add(JD.getDefaultResourceTracker(), std::move(G), std::move(I)); + } + + /// Adds a LinkGraph to the given JITDylib. The interface for the object will + /// be built using getLinkGraphInterface. + Error add(JITDylib &JD, std::unique_ptr<jitlink::LinkGraph> G) { + return add(JD.getDefaultResourceTracker(), std::move(G)); + } + + /// Emit should materialize the given IR. + virtual void emit(std::unique_ptr<MaterializationResponsibility> R, + std::unique_ptr<jitlink::LinkGraph> G) = 0; + + /// Get the interface for the given LinkGraph. + MaterializationUnit::Interface getInterface(jitlink::LinkGraph &G); + + /// Get the JITSymbolFlags for the given symbol. + static JITSymbolFlags getJITSymbolFlagsForSymbol(jitlink::Symbol &Sym); + +private: + ExecutionSession &ES; + std::atomic<uint64_t> Counter{0}; +}; + +/// MaterializationUnit for wrapping LinkGraphs. +class LinkGraphMaterializationUnit : public MaterializationUnit { +public: + LinkGraphMaterializationUnit(LinkGraphLayer &LGLayer, + std::unique_ptr<jitlink::LinkGraph> G, + Interface I) + : MaterializationUnit(I), LGLayer(LGLayer), G(std::move(G)) {} + + LinkGraphMaterializationUnit(LinkGraphLayer &LGLayer, + std::unique_ptr<jitlink::LinkGraph> G) + : MaterializationUnit(LGLayer.getInterface(*G)), LGLayer(LGLayer), + G(std::move(G)) {} + + StringRef getName() const override; + + void materialize(std::unique_ptr<MaterializationResponsibility> MR) override { + LGLayer.emit(std::move(MR), std::move(G)); + } + +private: + void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; + + LinkGraphLayer &LGLayer; + std::unique_ptr<jitlink::LinkGraph> G; +}; + +inline Error LinkGraphLayer::add(ResourceTrackerSP RT, + std::unique_ptr<jitlink::LinkGraph> G, + MaterializationUnit::Interface I) { + auto &JD = RT->getJITDylib(); + + return JD.define(std::make_unique<LinkGraphMaterializationUnit>( + *this, std::move(G), std::move(I)), + std::move(RT)); +} + +} // end namespace llvm::orc + +#endif // LLVM_EXECUTIONENGINE_ORC_LINKGRAPHLAYER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LinkGraphLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/LinkGraphLinkingLayer.h new file mode 100644 index 000000000000..3375bd9e4e2e --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/LinkGraphLinkingLayer.h @@ -0,0 +1,201 @@ +//===-- LinkGraphLinkingLayer.h - Link LinkGraphs with JITLink --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// LinkGraphLinkingLayer and associated utilities. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_LINKGRAPHLINKINGLAYER_H +#define LLVM_EXECUTIONENGINE_ORC_LINKGRAPHLINKINGLAYER_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/Layer.h" +#include "llvm/ExecutionEngine/Orc/LinkGraphLayer.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cassert> +#include <functional> +#include <memory> +#include <mutex> +#include <utility> +#include <vector> + +namespace llvm { + +namespace jitlink { +class EHFrameRegistrar; +} // namespace jitlink + +namespace orc { + +/// LinkGraphLinkingLayer links LinkGraphs into the Executor using JITLink. +/// +/// Clients can use this class to add LinkGraphs to an ExecutionSession, and it +/// serves as a base for the ObjectLinkingLayer that can link object files. +class LinkGraphLinkingLayer : public LinkGraphLayer, private ResourceManager { + class JITLinkCtx; + +public: + /// Plugin instances can be added to the ObjectLinkingLayer to receive + /// callbacks when code is loaded or emitted, and when JITLink is being + /// configured. + class Plugin { + public: + virtual ~Plugin(); + virtual void modifyPassConfig(MaterializationResponsibility &MR, + jitlink::LinkGraph &G, + jitlink::PassConfiguration &Config) {} + + // Deprecated. Don't use this in new code. There will be a proper mechanism + // for capturing object buffers. + virtual void notifyMaterializing(MaterializationResponsibility &MR, + jitlink::LinkGraph &G, + jitlink::JITLinkContext &Ctx, + MemoryBufferRef InputObject) {} + + virtual void notifyLoaded(MaterializationResponsibility &MR) {} + virtual Error notifyEmitted(MaterializationResponsibility &MR) { + return Error::success(); + } + virtual Error notifyFailed(MaterializationResponsibility &MR) = 0; + virtual Error notifyRemovingResources(JITDylib &JD, ResourceKey K) = 0; + virtual void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, + ResourceKey SrcKey) = 0; + }; + + /// Construct a LinkGraphLinkingLayer using the ExecutorProcessControl + /// instance's memory manager. + LinkGraphLinkingLayer(ExecutionSession &ES); + + /// Construct a LinkGraphLinkingLayer using a custom memory manager. + LinkGraphLinkingLayer(ExecutionSession &ES, + jitlink::JITLinkMemoryManager &MemMgr); + + /// Construct an LinkGraphLinkingLayer. Takes ownership of the given + /// JITLinkMemoryManager. This method is a temporary hack to simplify + /// co-existence with RTDyldObjectLinkingLayer (which also owns its + /// allocators). + LinkGraphLinkingLayer(ExecutionSession &ES, + std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr); + + /// Destroy the LinkGraphLinkingLayer. + ~LinkGraphLinkingLayer(); + + /// Add a plugin. + LinkGraphLinkingLayer &addPlugin(std::shared_ptr<Plugin> P) { + std::lock_guard<std::mutex> Lock(LayerMutex); + Plugins.push_back(std::move(P)); + return *this; + } + + /// Remove a plugin. This remove applies only to subsequent links (links + /// already underway will continue to use the plugin), and does not of itself + /// destroy the plugin -- destruction will happen once all shared pointers + /// (including those held by in-progress links) are destroyed. + void removePlugin(Plugin &P) { + std::lock_guard<std::mutex> Lock(LayerMutex); + auto I = llvm::find_if(Plugins, [&](const std::shared_ptr<Plugin> &Elem) { + return Elem.get() == &P; + }); + assert(I != Plugins.end() && "Plugin not present"); + Plugins.erase(I); + } + + /// Emit a LinkGraph. + void emit(std::unique_ptr<MaterializationResponsibility> R, + std::unique_ptr<jitlink::LinkGraph> G) override; + + /// Instructs this LinkgraphLinkingLayer instance to override the symbol flags + /// found in the LinkGraph with the flags supplied by the + /// MaterializationResponsibility instance. This is a workaround to support + /// symbol visibility in COFF, which does not use the libObject's + /// SF_Exported flag. Use only when generating / adding COFF object files. + /// + /// FIXME: We should be able to remove this if/when COFF properly tracks + /// exported symbols. + LinkGraphLinkingLayer & + setOverrideObjectFlagsWithResponsibilityFlags(bool OverrideObjectFlags) { + this->OverrideObjectFlags = OverrideObjectFlags; + return *this; + } + + /// If set, this LinkGraphLinkingLayer instance will claim responsibility + /// for any symbols provided by a given object file that were not already in + /// the MaterializationResponsibility instance. Setting this flag allows + /// higher-level program representations (e.g. LLVM IR) to be added based on + /// only a subset of the symbols they provide, without having to write + /// intervening layers to scan and add the additional symbols. This trades + /// diagnostic quality for convenience however: If all symbols are enumerated + /// up-front then clashes can be detected and reported early (and usually + /// deterministically). If this option is set, clashes for the additional + /// symbols may not be detected until late, and detection may depend on + /// the flow of control through JIT'd code. Use with care. + LinkGraphLinkingLayer & + setAutoClaimResponsibilityForObjectSymbols(bool AutoClaimObjectSymbols) { + this->AutoClaimObjectSymbols = AutoClaimObjectSymbols; + return *this; + } + +protected: + /// Emit a LinkGraph with the given backing buffer. + /// + /// This overload is intended for use by ObjectLinkingLayer. + void emit(std::unique_ptr<MaterializationResponsibility> R, + std::unique_ptr<jitlink::LinkGraph> G, + std::unique_ptr<MemoryBuffer> ObjBuf); + + std::function<void(std::unique_ptr<MemoryBuffer>)> ReturnObjectBuffer; + +private: + using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc; + + Error recordFinalizedAlloc(MaterializationResponsibility &MR, + FinalizedAlloc FA); + + Error handleRemoveResources(JITDylib &JD, ResourceKey K) override; + void handleTransferResources(JITDylib &JD, ResourceKey DstKey, + ResourceKey SrcKey) override; + + mutable std::mutex LayerMutex; + jitlink::JITLinkMemoryManager &MemMgr; + std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgrOwnership; + bool OverrideObjectFlags = false; + bool AutoClaimObjectSymbols = false; + DenseMap<ResourceKey, std::vector<FinalizedAlloc>> Allocs; + std::vector<std::shared_ptr<Plugin>> Plugins; +}; + +class EHFrameRegistrationPlugin : public LinkGraphLinkingLayer::Plugin { +public: + EHFrameRegistrationPlugin( + ExecutionSession &ES, + std::unique_ptr<jitlink::EHFrameRegistrar> Registrar); + void modifyPassConfig(MaterializationResponsibility &MR, + jitlink::LinkGraph &G, + jitlink::PassConfiguration &PassConfig) override; + Error notifyEmitted(MaterializationResponsibility &MR) override; + Error notifyFailed(MaterializationResponsibility &MR) override; + Error notifyRemovingResources(JITDylib &JD, ResourceKey K) override; + void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, + ResourceKey SrcKey) override; + +private: + std::mutex EHFramePluginMutex; + ExecutionSession &ES; + std::unique_ptr<jitlink::EHFrameRegistrar> Registrar; + DenseMap<MaterializationResponsibility *, ExecutorAddrRange> InProcessLinks; + DenseMap<ResourceKey, std::vector<ExecutorAddrRange>> EHFrameRanges; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_LINKGRAPHLINKINGLAYER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h index 6ffd286c365a..8e29f219774b 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h @@ -460,8 +460,8 @@ private: return; StrTab.resize(Strings.size()); - for (auto &KV : Strings) - StrTab[KV.second] = {KV.first, 0}; + for (auto &[Str, Idx] : Strings) + StrTab[Idx] = {Str, 0}; size_t Offset = 0; for (auto &Elem : StrTab) { Elem.Offset = Offset; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index 19f935d66582..1f11d9f61f6a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -176,6 +176,11 @@ public: static ArrayRef<std::pair<const char *, const char *>> standardRuntimeUtilityAliases(); + /// Returns a list of aliases required to enable lazy compilation via the + /// ORC runtime. + static ArrayRef<std::pair<const char *, const char *>> + standardLazyCompilationAliases(); + private: using SymbolTableVector = SmallVector< std::tuple<ExecutorAddr, ExecutorAddr, MachOExecutorSymbolFlags>>; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 6e0c46bfa8c4..b392c5bf6714 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -16,18 +16,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ExecutionEngine/JITLink/JITLink.h" -#include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/Layer.h" +#include "llvm/ExecutionEngine/Orc/LinkGraphLinkingLayer.h" #include "llvm/Support/Error.h" -#include <algorithm> -#include <cassert> -#include <functional> -#include <list> + #include <memory> -#include <utility> -#include <vector> namespace llvm { @@ -39,67 +33,41 @@ class Symbol; namespace orc { -class ObjectLinkingLayerJITLinkContext; - /// An ObjectLayer implementation built on JITLink. /// /// Clients can use this class to add relocatable object files to an /// ExecutionSession, and it typically serves as the base layer (underneath /// a compiling layer like IRCompileLayer) for the rest of the JIT. -class ObjectLinkingLayer : public RTTIExtends<ObjectLinkingLayer, ObjectLayer>, - private ResourceManager { - friend class ObjectLinkingLayerJITLinkContext; +class ObjectLinkingLayer : public LinkGraphLinkingLayer, + public RTTIExtends<ObjectLinkingLayer, ObjectLayer> { +private: + using BaseObjectLayer = RTTIExtends<ObjectLinkingLayer, ObjectLayer>; public: static char ID; - /// Plugin instances can be added to the ObjectLinkingLayer to receive - /// callbacks when code is loaded or emitted, and when JITLink is being - /// configured. - class Plugin { - public: - virtual ~Plugin(); - virtual void modifyPassConfig(MaterializationResponsibility &MR, - jitlink::LinkGraph &G, - jitlink::PassConfiguration &Config) {} - - // Deprecated. Don't use this in new code. There will be a proper mechanism - // for capturing object buffers. - virtual void notifyMaterializing(MaterializationResponsibility &MR, - jitlink::LinkGraph &G, - jitlink::JITLinkContext &Ctx, - MemoryBufferRef InputObject) {} - - virtual void notifyLoaded(MaterializationResponsibility &MR) {} - virtual Error notifyEmitted(MaterializationResponsibility &MR) { - return Error::success(); - } - virtual Error notifyFailed(MaterializationResponsibility &MR) = 0; - virtual Error notifyRemovingResources(JITDylib &JD, ResourceKey K) = 0; - virtual void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, - ResourceKey SrcKey) = 0; - }; - using ReturnObjectBufferFunction = std::function<void(std::unique_ptr<MemoryBuffer>)>; /// Construct an ObjectLinkingLayer using the ExecutorProcessControl /// instance's memory manager. - ObjectLinkingLayer(ExecutionSession &ES); + ObjectLinkingLayer(ExecutionSession &ES) + : LinkGraphLinkingLayer(ES), BaseObjectLayer(ES) {} /// Construct an ObjectLinkingLayer using a custom memory manager. ObjectLinkingLayer(ExecutionSession &ES, - jitlink::JITLinkMemoryManager &MemMgr); + jitlink::JITLinkMemoryManager &MemMgr) + : LinkGraphLinkingLayer(ES, MemMgr), BaseObjectLayer(ES) {} /// Construct an ObjectLinkingLayer. Takes ownership of the given /// JITLinkMemoryManager. This method is a temporary hack to simplify /// co-existence with RTDyldObjectLinkingLayer (which also owns its /// allocators). ObjectLinkingLayer(ExecutionSession &ES, - std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr); + std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr) + : LinkGraphLinkingLayer(ES, std::move(MemMgr)), BaseObjectLayer(ES) {} - /// Destruct an ObjectLinkingLayer. - ~ObjectLinkingLayer(); + using LinkGraphLinkingLayer::getExecutionSession; /// Set an object buffer return function. By default object buffers are /// deleted once the JIT has linked them. If a return function is set then @@ -108,116 +76,14 @@ public: this->ReturnObjectBuffer = std::move(ReturnObjectBuffer); } - /// Add a plugin. - ObjectLinkingLayer &addPlugin(std::shared_ptr<Plugin> P) { - std::lock_guard<std::mutex> Lock(LayerMutex); - Plugins.push_back(std::move(P)); - return *this; - } + using LinkGraphLinkingLayer::add; + using LinkGraphLinkingLayer::emit; - /// Remove a plugin. This remove applies only to subsequent links (links - /// already underway will continue to use the plugin), and does not of itself - /// destroy the plugin -- destruction will happen once all shared pointers - /// (including those held by in-progress links) are destroyed. - void removePlugin(Plugin &P) { - std::lock_guard<std::mutex> Lock(LayerMutex); - auto I = llvm::find_if(Plugins, [&](const std::shared_ptr<Plugin> &Elem) { - return Elem.get() == &P; - }); - assert(I != Plugins.end() && "Plugin not present"); - Plugins.erase(I); - } - - /// Add a LinkGraph to the JITDylib targeted by the given tracker. - Error add(ResourceTrackerSP, std::unique_ptr<jitlink::LinkGraph> G); - - /// Add a LinkGraph to the given JITDylib. - Error add(JITDylib &JD, std::unique_ptr<jitlink::LinkGraph> G) { - return add(JD.getDefaultResourceTracker(), std::move(G)); - } - - // Un-hide ObjectLayer add methods. using ObjectLayer::add; /// Emit an object file. void emit(std::unique_ptr<MaterializationResponsibility> R, std::unique_ptr<MemoryBuffer> O) override; - - /// Emit a LinkGraph. - void emit(std::unique_ptr<MaterializationResponsibility> R, - std::unique_ptr<jitlink::LinkGraph> G); - - /// Instructs this ObjectLinkingLayer instance to override the symbol flags - /// found in the AtomGraph with the flags supplied by the - /// MaterializationResponsibility instance. This is a workaround to support - /// symbol visibility in COFF, which does not use the libObject's - /// SF_Exported flag. Use only when generating / adding COFF object files. - /// - /// FIXME: We should be able to remove this if/when COFF properly tracks - /// exported symbols. - ObjectLinkingLayer & - setOverrideObjectFlagsWithResponsibilityFlags(bool OverrideObjectFlags) { - this->OverrideObjectFlags = OverrideObjectFlags; - return *this; - } - - /// If set, this ObjectLinkingLayer instance will claim responsibility - /// for any symbols provided by a given object file that were not already in - /// the MaterializationResponsibility instance. Setting this flag allows - /// higher-level program representations (e.g. LLVM IR) to be added based on - /// only a subset of the symbols they provide, without having to write - /// intervening layers to scan and add the additional symbols. This trades - /// diagnostic quality for convenience however: If all symbols are enumerated - /// up-front then clashes can be detected and reported early (and usually - /// deterministically). If this option is set, clashes for the additional - /// symbols may not be detected until late, and detection may depend on - /// the flow of control through JIT'd code. Use with care. - ObjectLinkingLayer & - setAutoClaimResponsibilityForObjectSymbols(bool AutoClaimObjectSymbols) { - this->AutoClaimObjectSymbols = AutoClaimObjectSymbols; - return *this; - } - -private: - using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc; - - Error recordFinalizedAlloc(MaterializationResponsibility &MR, - FinalizedAlloc FA); - - Error handleRemoveResources(JITDylib &JD, ResourceKey K) override; - void handleTransferResources(JITDylib &JD, ResourceKey DstKey, - ResourceKey SrcKey) override; - - mutable std::mutex LayerMutex; - jitlink::JITLinkMemoryManager &MemMgr; - std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgrOwnership; - bool OverrideObjectFlags = false; - bool AutoClaimObjectSymbols = false; - ReturnObjectBufferFunction ReturnObjectBuffer; - DenseMap<ResourceKey, std::vector<FinalizedAlloc>> Allocs; - std::vector<std::shared_ptr<Plugin>> Plugins; -}; - -class EHFrameRegistrationPlugin : public ObjectLinkingLayer::Plugin { -public: - EHFrameRegistrationPlugin( - ExecutionSession &ES, - std::unique_ptr<jitlink::EHFrameRegistrar> Registrar); - void modifyPassConfig(MaterializationResponsibility &MR, - jitlink::LinkGraph &G, - jitlink::PassConfiguration &PassConfig) override; - Error notifyEmitted(MaterializationResponsibility &MR) override; - Error notifyFailed(MaterializationResponsibility &MR) override; - Error notifyRemovingResources(JITDylib &JD, ResourceKey K) override; - void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, - ResourceKey SrcKey) override; - -private: - std::mutex EHFramePluginMutex; - ExecutionSession &ES; - std::unique_ptr<jitlink::EHFrameRegistrar> Registrar; - DenseMap<MaterializationResponsibility *, ExecutorAddrRange> InProcessLinks; - DenseMap<ResourceKey, std::vector<ExecutorAddrRange>> EHFrameRanges; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h b/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h index a1a5ffcf3406..f3d4c7693877 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h @@ -23,6 +23,7 @@ namespace orc { class RedirectionManager { public: virtual ~RedirectionManager() = default; + /// Change the redirection destination of given symbols to new destination /// symbols. virtual Error redirect(JITDylib &JD, const SymbolMap &NewDests) = 0; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h index 0c549bcbf013..aed43f6308cb 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h @@ -42,6 +42,7 @@ extern const char *MemoryWriteUInt16sWrapperName; extern const char *MemoryWriteUInt32sWrapperName; extern const char *MemoryWriteUInt64sWrapperName; extern const char *MemoryWriteBuffersWrapperName; +extern const char *MemoryWritePointersWrapperName; extern const char *RegisterEHFrameSectionWrapperName; extern const char *DeregisterEHFrameSectionWrapperName; diff --git a/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h b/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h index 034c134a1373..92fdd376abd1 100644 --- a/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h +++ b/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h @@ -10,6 +10,7 @@ #define LLVM_EXECUTIONENGINE_RUNTIMEDYLDCHECKER_H #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" #include "llvm/Support/Endian.h" #include "llvm/TargetParser/SubtargetFeature.h" #include "llvm/TargetParser/Triple.h" diff --git a/llvm/include/llvm/Frontend/Atomic/Atomic.h b/llvm/include/llvm/Frontend/Atomic/Atomic.h index 3942d06144ce..9f46fde6292a 100644 --- a/llvm/include/llvm/Frontend/Atomic/Atomic.h +++ b/llvm/include/llvm/Frontend/Atomic/Atomic.h @@ -1,5 +1,4 @@ -//===--- Atomic.h - Codegen of atomic operations -//---------------------------===// +//===--- Atomic.h - Codegen of atomic operations ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,46 +9,39 @@ #ifndef LLVM_FRONTEND_ATOMIC_ATOMIC_H #define LLVM_FRONTEND_ATOMIC_ATOMIC_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/RuntimeLibcalls.h" namespace llvm { - -template <typename IRBuilderTy> struct AtomicInfo { - - IRBuilderTy *Builder; +class AtomicInfo { +protected: + IRBuilderBase *Builder; Type *Ty; uint64_t AtomicSizeInBits; uint64_t ValueSizeInBits; - llvm::Align AtomicAlign; - llvm::Align ValueAlign; + Align AtomicAlign; + Align ValueAlign; bool UseLibcall; public: - AtomicInfo(IRBuilderTy *Builder, Type *Ty, uint64_t AtomicSizeInBits, - uint64_t ValueSizeInBits, llvm::Align AtomicAlign, - llvm::Align ValueAlign, bool UseLibcall) + AtomicInfo(IRBuilderBase *Builder, Type *Ty, uint64_t AtomicSizeInBits, + uint64_t ValueSizeInBits, Align AtomicAlign, Align ValueAlign, + bool UseLibcall) : Builder(Builder), Ty(Ty), AtomicSizeInBits(AtomicSizeInBits), ValueSizeInBits(ValueSizeInBits), AtomicAlign(AtomicAlign), ValueAlign(ValueAlign), UseLibcall(UseLibcall) {} virtual ~AtomicInfo() = default; - llvm::Align getAtomicAlignment() const { return AtomicAlign; } + Align getAtomicAlignment() const { return AtomicAlign; } uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } uint64_t getValueSizeInBits() const { return ValueSizeInBits; } bool shouldUseLibcall() const { return UseLibcall; } - llvm::Type *getAtomicTy() const { return Ty; } + Type *getAtomicTy() const { return Ty; } - virtual llvm::Value *getAtomicPointer() const = 0; + virtual Value *getAtomicPointer() const = 0; virtual void decorateWithTBAA(Instruction *I) = 0; - virtual llvm::AllocaInst *CreateAlloca(llvm::Type *Ty, - const llvm::Twine &Name) const = 0; + virtual AllocaInst *CreateAlloca(Type *Ty, const Twine &Name) const = 0; /* * Is the atomic size larger than the underlying value type? @@ -62,90 +54,28 @@ public: LLVMContext &getLLVMContext() const { return Builder->getContext(); } - static bool shouldCastToInt(llvm::Type *ValTy, bool CmpXchg) { - if (ValTy->isFloatingPointTy()) - return ValTy->isX86_FP80Ty() || CmpXchg; - return !ValTy->isIntegerTy() && !ValTy->isPointerTy(); - } + bool shouldCastToInt(Type *ValTy, bool CmpXchg); - llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile, - bool CmpXchg = false) { - Value *Ptr = getAtomicPointer(); - Type *AtomicTy = Ty; - if (shouldCastToInt(Ty, CmpXchg)) - AtomicTy = llvm::IntegerType::get(getLLVMContext(), AtomicSizeInBits); - LoadInst *Load = - Builder->CreateAlignedLoad(AtomicTy, Ptr, AtomicAlign, "atomic-load"); - Load->setAtomic(AO); - if (IsVolatile) - Load->setVolatile(true); - decorateWithTBAA(Load); - return Load; - } + Value *EmitAtomicLoadOp(AtomicOrdering AO, bool IsVolatile, + bool CmpXchg = false); - static CallInst *EmitAtomicLibcall(IRBuilderTy *Builder, StringRef fnName, - Type *ResultType, ArrayRef<Value *> Args) { - LLVMContext &ctx = Builder->getContext(); - SmallVector<Type *, 6> ArgTys; - for (Value *Arg : Args) - ArgTys.push_back(Arg->getType()); - FunctionType *FnType = FunctionType::get(ResultType, ArgTys, false); - Module *M = Builder->GetInsertBlock()->getModule(); - - // TODO: Use llvm::TargetLowering for Libcall ABI - llvm::AttrBuilder fnAttrBuilder(ctx); - fnAttrBuilder.addAttribute(llvm::Attribute::NoUnwind); - fnAttrBuilder.addAttribute(llvm::Attribute::WillReturn); - llvm::AttributeList fnAttrs = llvm::AttributeList::get( - ctx, llvm::AttributeList::FunctionIndex, fnAttrBuilder); - FunctionCallee LibcallFn = M->getOrInsertFunction(fnName, FnType, fnAttrs); - CallInst *Call = Builder->CreateCall(LibcallFn, Args); - return Call; - } + CallInst *EmitAtomicLibcall(StringRef fnName, Type *ResultType, + ArrayRef<Value *> Args); - llvm::Value *getAtomicSizeValue() const { + Value *getAtomicSizeValue() const { LLVMContext &ctx = getLLVMContext(); - // TODO: Get from llvm::TargetMachine / clang::TargetInfo - // if clang shares this codegen in future + // if clang shares this codegen in future constexpr uint16_t SizeTBits = 64; constexpr uint16_t BitsPerByte = 8; - return llvm::ConstantInt::get(llvm::IntegerType::get(ctx, SizeTBits), - AtomicSizeInBits / BitsPerByte); + return ConstantInt::get(IntegerType::get(ctx, SizeTBits), + AtomicSizeInBits / BitsPerByte); } - std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeLibcall( - llvm::Value *ExpectedVal, llvm::Value *DesiredVal, - llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure) { - LLVMContext &ctx = getLLVMContext(); - - // __atomic_compare_exchange's expected and desired are passed by pointers - // FIXME: types - - // TODO: Get from llvm::TargetMachine / clang::TargetInfo - // if clang shares this codegen in future - constexpr uint64_t IntBits = 32; - - // bool __atomic_compare_exchange(size_t size, void *obj, void *expected, - // void *desired, int success, int failure); - llvm::Value *Args[6] = { - getAtomicSizeValue(), - getAtomicPointer(), - ExpectedVal, - DesiredVal, - llvm::Constant::getIntegerValue( - llvm::IntegerType::get(ctx, IntBits), - llvm::APInt(IntBits, static_cast<uint64_t>(Success), - /*signed=*/true)), - llvm::Constant::getIntegerValue( - llvm::IntegerType::get(ctx, IntBits), - llvm::APInt(IntBits, static_cast<uint64_t>(Failure), - /*signed=*/true)), - }; - auto Result = EmitAtomicLibcall(Builder, "__atomic_compare_exchange", - llvm::IntegerType::getInt1Ty(ctx), Args); - return std::make_pair(ExpectedVal, Result); - } + std::pair<Value *, Value *> + EmitAtomicCompareExchangeLibcall(Value *ExpectedVal, Value *DesiredVal, + AtomicOrdering Success, + AtomicOrdering Failure); Value *castToAtomicIntPointer(Value *addr) const { return addr; // opaque pointer @@ -155,77 +85,17 @@ public: return castToAtomicIntPointer(getAtomicPointer()); } - std::pair<llvm::Value *, llvm::Value *> - EmitAtomicCompareExchangeOp(llvm::Value *ExpectedVal, llvm::Value *DesiredVal, - llvm::AtomicOrdering Success, - llvm::AtomicOrdering Failure, - bool IsVolatile = false, bool IsWeak = false) { - // Do the atomic store. - Value *Addr = getAtomicAddressAsAtomicIntPointer(); - auto *Inst = Builder->CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal, - getAtomicAlignment(), Success, - Failure, llvm::SyncScope::System); - // Other decoration. - Inst->setVolatile(IsVolatile); - Inst->setWeak(IsWeak); - - auto *PreviousVal = Builder->CreateExtractValue(Inst, /*Idxs=*/0); - auto *SuccessFailureVal = Builder->CreateExtractValue(Inst, /*Idxs=*/1); - return std::make_pair(PreviousVal, SuccessFailureVal); - } + std::pair<Value *, Value *> + EmitAtomicCompareExchangeOp(Value *ExpectedVal, Value *DesiredVal, + AtomicOrdering Success, AtomicOrdering Failure, + bool IsVolatile = false, bool IsWeak = false); - std::pair<llvm::Value *, llvm::Value *> - EmitAtomicCompareExchange(llvm::Value *ExpectedVal, llvm::Value *DesiredVal, - llvm::AtomicOrdering Success, - llvm::AtomicOrdering Failure, bool IsVolatile, - bool IsWeak) { - if (shouldUseLibcall()) - return EmitAtomicCompareExchangeLibcall(ExpectedVal, DesiredVal, Success, - Failure); - - auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success, - Failure, IsVolatile, IsWeak); - return Res; - } + std::pair<Value *, Value *> + EmitAtomicCompareExchange(Value *ExpectedVal, Value *DesiredVal, + AtomicOrdering Success, AtomicOrdering Failure, + bool IsVolatile, bool IsWeak); - // void __atomic_load(size_t size, void *mem, void *return, int order); - std::pair<llvm::LoadInst *, llvm::AllocaInst *> - EmitAtomicLoadLibcall(llvm::AtomicOrdering AO) { - LLVMContext &Ctx = getLLVMContext(); - Type *SizedIntTy = Type::getIntNTy(Ctx, getAtomicSizeInBits()); - Type *ResultTy; - SmallVector<Value *, 6> Args; - AttributeList Attr; - Module *M = Builder->GetInsertBlock()->getModule(); - const DataLayout &DL = M->getDataLayout(); - Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), - this->getAtomicSizeInBits() / 8)); - - Value *PtrVal = getAtomicPointer(); - PtrVal = Builder->CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx)); - Args.push_back(PtrVal); - AllocaInst *AllocaResult = - CreateAlloca(Ty, getAtomicPointer()->getName() + "atomic.temp.load"); - const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); - AllocaResult->setAlignment(AllocaAlignment); - Args.push_back(AllocaResult); - Constant *OrderingVal = - ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(AO)); - Args.push_back(OrderingVal); - - ResultTy = Type::getVoidTy(Ctx); - SmallVector<Type *, 6> ArgTys; - for (Value *Arg : Args) - ArgTys.push_back(Arg->getType()); - FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); - FunctionCallee LibcallFn = - M->getOrInsertFunction("__atomic_load", FnType, Attr); - CallInst *Call = Builder->CreateCall(LibcallFn, Args); - Call->setAttributes(Attr); - return std::make_pair( - Builder->CreateAlignedLoad(Ty, AllocaResult, AllocaAlignment), - AllocaResult); - } + std::pair<LoadInst *, AllocaInst *> EmitAtomicLoadLibcall(AtomicOrdering AO); }; } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 67632fb79f8a..3e22b6ff71c8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -758,15 +758,13 @@ template <typename T, typename I, typename E> // struct LinearT { // std::get<type> won't work here due to duplicate types in the tuple. using List = ObjectListT<I, E>; - using StepSimpleModifier = E; + // StepSimpleModifier is same as StepComplexModifier. using StepComplexModifier = E; ENUM(LinearModifier, Ref, Val, Uval); using TupleTrait = std::true_type; // Step == nullopt means 1. - std::tuple<OPT(StepSimpleModifier), OPT(StepComplexModifier), - OPT(LinearModifier), List> - t; + std::tuple<OPT(StepComplexModifier), OPT(LinearModifier), List> t; }; // V5.2: [5.8.5] `link` clause diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 4bdfa1cf4c14..20fb581ee631 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -239,6 +239,8 @@ private: bool applyClause(const tomp::clause::OmpxAttributeT<TypeTy, IdTy, ExprTy> &clause, const ClauseTy *); + bool applyClause(const tomp::clause::OmpxBareT<TypeTy, IdTy, ExprTy> &clause, + const ClauseTy *); uint32_t version; llvm::omp::Directive construct; @@ -1105,6 +1107,13 @@ bool ConstructDecompositionT<C, H>::applyClause( template <typename C, typename H> bool ConstructDecompositionT<C, H>::applyClause( + const tomp::clause::OmpxBareT<TypeTy, IdTy, ExprTy> &clause, + const ClauseTy *node) { + return applyToOutermost(node); +} + +template <typename C, typename H> +bool ConstructDecompositionT<C, H>::applyClause( const tomp::clause::OmpxAttributeT<TypeTy, IdTy, ExprTy> &clause, const ClauseTy *node) { return applyToAll(node); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index bd7fb2361aae..e36eb77cefe7 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -66,6 +66,7 @@ def OMPC_AppendArgs : Clause<"append_args"> { } def OMPC_At : Clause<"at"> { let clangClass = "OMPAtClause"; + let flangClass = "OmpAtClause"; } def OMPC_AtomicDefaultMemOrder : Clause<"atomic_default_mem_order"> { let clangClass = "OMPAtomicDefaultMemOrderClause"; @@ -177,6 +178,7 @@ def OMPC_Exclusive : Clause<"exclusive"> { } def OMPC_Fail : Clause<"fail"> { let clangClass = "OMPFailClause"; + let flangClass = "OmpFailClause"; } def OMPC_Filter : Clause<"filter"> { let clangClass = "OMPFilterClause"; @@ -287,6 +289,7 @@ def OMPC_Mergeable : Clause<"mergeable"> { } def OMPC_Message : Clause<"message"> { let clangClass = "OMPMessageClause"; + let flangClass = "OmpMessageClause"; } def OMPC_NoOpenMP : Clause<"no_openmp"> { let clangClass = "OMPNoOpenMPClause"; @@ -444,6 +447,7 @@ def OMPC_SeqCst : Clause<"seq_cst"> { } def OMPC_Severity : Clause<"severity"> { let clangClass = "OMPSeverityClause"; + let flangClass = "OmpSeverityClause"; } def OMPC_Shared : Clause<"shared"> { let clangClass = "OMPSharedClause"; @@ -463,7 +467,7 @@ def OMPC_Sizes: Clause<"sizes"> { } def OMPC_TaskReduction : Clause<"task_reduction"> { let clangClass = "OMPTaskReductionClause"; - let flangClass = "OmpReductionClause"; + let flangClass = "OmpTaskReductionClause"; } def OMPC_ThreadLimit : Clause<"thread_limit"> { let clangClass = "OMPThreadLimitClause"; @@ -1017,6 +1021,7 @@ def OMP_Target : Directive<"target"> { VersionedClause<OMPC_Device>, VersionedClause<OMPC_If>, VersionedClause<OMPC_NoWait>, + VersionedClause<OMPC_OMPX_Bare>, VersionedClause<OMPC_OMPX_DynCGroupMem>, VersionedClause<OMPC_ThreadLimit, 51>, ]; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a97deafa3683..4ce47b1c05d9 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -480,16 +480,15 @@ public: T(Triple(M.getTargetTriple())) {} ~OpenMPIRBuilder(); - class AtomicInfo : public llvm::AtomicInfo<IRBuilder<>> { + class AtomicInfo : public llvm::AtomicInfo { llvm::Value *AtomicVar; public: AtomicInfo(IRBuilder<> *Builder, llvm::Type *Ty, uint64_t AtomicSizeInBits, uint64_t ValueSizeInBits, llvm::Align AtomicAlign, llvm::Align ValueAlign, bool UseLibcall, llvm::Value *AtomicVar) - : llvm::AtomicInfo<IRBuilder<>>(Builder, Ty, AtomicSizeInBits, - ValueSizeInBits, AtomicAlign, - ValueAlign, UseLibcall), + : llvm::AtomicInfo(Builder, Ty, AtomicSizeInBits, ValueSizeInBits, + AtomicAlign, ValueAlign, UseLibcall), AtomicVar(AtomicVar) {} llvm::Value *getAtomicPointer() const override { return AtomicVar; } @@ -1262,12 +1261,15 @@ public: /// cannot be resumed until execution of the structured /// block that is associated with the generated task is /// completed. + /// \param EventHandle If present, signifies the event handle as part of + /// the detach clause /// \param Mergeable If the given task is `mergeable` InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr, Value *IfCondition = nullptr, - SmallVector<DependData> Dependencies = {}, bool Mergeable = false); + SmallVector<DependData> Dependencies = {}, bool Mergeable = false, + Value *EventHandle = nullptr); /// Generator for the taskgroup construct /// @@ -2855,6 +2857,67 @@ public: using GenMapInfoCallbackTy = function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; +private: + /// Emit the array initialization or deletion portion for user-defined mapper + /// code generation. First, it evaluates whether an array section is mapped + /// and whether the \a MapType instructs to delete this section. If \a IsInit + /// is true, and \a MapType indicates to not delete this array, array + /// initialization code is generated. If \a IsInit is false, and \a MapType + /// indicates to delete this array, array deletion code is generated. + void emitUDMapperArrayInitOrDel(Function *MapperFn, llvm::Value *MapperHandle, + llvm::Value *Base, llvm::Value *Begin, + llvm::Value *Size, llvm::Value *MapType, + llvm::Value *MapName, TypeSize ElementSize, + llvm::BasicBlock *ExitBB, bool IsInit); + +public: + /// Emit the user-defined mapper function. The code generation follows the + /// pattern in the example below. + /// \code + /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, + /// void *base, void *begin, + /// int64_t size, int64_t type, + /// void *name = nullptr) { + /// // Allocate space for an array section first or add a base/begin for + /// // pointer dereference. + /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && + /// !maptype.IsDelete) + /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, + /// size*sizeof(Ty), clearToFromMember(type)); + /// // Map members. + /// for (unsigned i = 0; i < size; i++) { + /// // For each component specified by this mapper: + /// for (auto c : begin[i]->all_components) { + /// if (c.hasMapper()) + /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, + /// c.arg_size, + /// c.arg_type, c.arg_name); + /// else + /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, + /// c.arg_begin, c.arg_size, c.arg_type, + /// c.arg_name); + /// } + /// } + /// // Delete the array section. + /// if (size > 1 && maptype.IsDelete) + /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, + /// size*sizeof(Ty), clearToFromMember(type)); + /// } + /// \endcode + /// + /// \param PrivAndGenMapInfoCB Callback that privatizes code and populates the + /// MapInfos and returns. + /// \param ElemTy DeclareMapper element type. + /// \param FuncName Optional param to specify mapper function name. + /// \param CustomMapperCB Optional callback to generate code related to + /// custom mappers. + Function *emitUserDefinedMapper( + function_ref<MapInfosTy &(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, + llvm::Value *BeginArg)> + PrivAndGenMapInfoCB, + llvm::Type *ElemTy, StringRef FuncName, + function_ref<bool(unsigned int, Function **)> CustomMapperCB = nullptr); + /// Generator for '#omp target data' /// /// \param Loc The location where the target data construct was encountered. @@ -3092,15 +3155,6 @@ private: AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr); - std::pair<llvm::LoadInst *, llvm::AllocaInst *> - EmitAtomicLoadLibcall(Value *X, Type *XElemTy, llvm::AtomicOrdering AO, - uint64_t AtomicSizeInBits); - - std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeLibcall( - Value *X, Type *XElemTy, uint64_t AtomicSizeInBits, - llvm::Value *ExpectedVal, llvm::Value *DesiredVal, - llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure); - /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . /// /// \Return The instruction diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 49f4527bde66..61955cf883c3 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -317,6 +317,9 @@ def SanitizeAddress : EnumAttr<"sanitize_address", IntersectPreserve, [FnAttr]>; /// ThreadSanitizer is on. def SanitizeThread : EnumAttr<"sanitize_thread", IntersectPreserve, [FnAttr]>; +/// TypeSanitizer is on. +def SanitizeType : EnumAttr<"sanitize_type", IntersectPreserve, [FnAttr]>; + /// MemorySanitizer is on. def SanitizeMemory : EnumAttr<"sanitize_memory", IntersectPreserve, [FnAttr]>; @@ -425,6 +428,7 @@ class CompatRuleStrAttr<string F, string Attr> : CompatRule<F> { def : CompatRule<"isEqual<SanitizeAddressAttr>">; def : CompatRule<"isEqual<SanitizeThreadAttr>">; +def : CompatRule<"isEqual<SanitizeTypeAttr>">; def : CompatRule<"isEqual<SanitizeMemoryAttr>">; def : CompatRule<"isEqual<SanitizeHWAddressAttr>">; def : CompatRule<"isEqual<SanitizeMemTagAttr>">; diff --git a/llvm/include/llvm/IR/CmpPredicate.h b/llvm/include/llvm/IR/CmpPredicate.h index 4b1be7beb2b6..9aa1449465f5 100644 --- a/llvm/include/llvm/IR/CmpPredicate.h +++ b/llvm/include/llvm/IR/CmpPredicate.h @@ -24,6 +24,9 @@ class CmpPredicate { bool HasSameSign; public: + /// Default constructor. + CmpPredicate() : Pred(CmpInst::BAD_ICMP_PREDICATE), HasSameSign(false) {} + /// Constructed implictly with a either Predicate and samesign information, or /// just a Predicate, dropping samesign information. CmpPredicate(CmpInst::Predicate Pred, bool HasSameSign = false) @@ -52,10 +55,22 @@ public: /// An operator== on the underlying Predicate. bool operator==(CmpInst::Predicate P) const { return Pred == P; } + bool operator!=(CmpInst::Predicate P) const { return Pred != P; } /// There is no operator== defined on CmpPredicate. Use getMatching instead to /// get the canonicalized matching CmpPredicate. bool operator==(CmpPredicate) const = delete; + bool operator!=(CmpPredicate) const = delete; + + /// Do a ICmpInst::getCmpPredicate() or CmpInst::getPredicate(), as + /// appropriate. + static CmpPredicate get(const CmpInst *Cmp); + + /// Get the swapped predicate of a CmpPredicate. + static CmpPredicate getSwapped(CmpPredicate P); + + /// Get the swapped predicate of a CmpInst. + static CmpPredicate getSwapped(const CmpInst *Cmp); }; } // namespace llvm diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 93bd519f5727..2ad080e6d0cd 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -330,9 +330,6 @@ public: /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; - /// Returns the maximum index size over all address spaces. - unsigned getMaxIndexSize() const; - // Index size in bytes used for address calculation, /// rounded up to a whole number of bytes. unsigned getIndexSize(unsigned AS) const; @@ -368,11 +365,6 @@ public: return getPointerSpec(AS).BitWidth; } - /// Returns the maximum index size over all address spaces. - unsigned getMaxIndexSizeInBits() const { - return getMaxIndexSize() * 8; - } - /// Size in bits of index used for address calculation in getelementptr. unsigned getIndexSizeInBits(unsigned AS) const { return getPointerSpec(AS).IndexBitWidth; diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 0abff016b777..694785317af0 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -58,7 +58,10 @@ enum DiagnosticSeverity : char { /// Defines the different supported kind of a diagnostic. /// This enum should be extended with a new ID for each added concrete subclass. enum DiagnosticKind { + DK_Generic, + DK_GenericWithLoc, DK_InlineAsm, + DK_RegAllocFailure, DK_ResourceLimit, DK_StackSize, DK_Linker, @@ -134,6 +137,33 @@ public: using DiagnosticHandlerFunction = std::function<void(const DiagnosticInfo &)>; +class DiagnosticInfoGeneric : public DiagnosticInfo { + const Twine &MsgStr; + const Instruction *Inst = nullptr; + +public: + /// \p MsgStr is the message to be reported to the frontend. + /// This class does not copy \p MsgStr, therefore the reference must be valid + /// for the whole life time of the Diagnostic. + DiagnosticInfoGeneric(const Twine &MsgStr, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(DK_Generic, Severity), MsgStr(MsgStr) {} + + DiagnosticInfoGeneric(const Instruction *I, const Twine &ErrMsg, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(DK_Generic, Severity), MsgStr(ErrMsg), Inst(I) {} + + const Twine &getMsgStr() const { return MsgStr; } + const Instruction *getInstruction() const { return Inst; } + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_Generic; + } +}; + /// Diagnostic information for inline asm reporting. /// This is basically a message and an optional location. class DiagnosticInfoInlineAsm : public DiagnosticInfo { @@ -146,21 +176,12 @@ private: const Instruction *Instr = nullptr; public: - /// \p MsgStr is the message to be reported to the frontend. - /// This class does not copy \p MsgStr, therefore the reference must be valid - /// for the whole life time of the Diagnostic. - DiagnosticInfoInlineAsm(const Twine &MsgStr, - DiagnosticSeverity Severity = DS_Error) - : DiagnosticInfo(DK_InlineAsm, Severity), MsgStr(MsgStr) {} - /// \p LocCookie if non-zero gives the line number for this report. /// \p MsgStr gives the message. /// This class does not copy \p MsgStr, therefore the reference must be valid /// for the whole life time of the Diagnostic. DiagnosticInfoInlineAsm(uint64_t LocCookie, const Twine &MsgStr, - DiagnosticSeverity Severity = DS_Error) - : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(LocCookie), - MsgStr(MsgStr) {} + DiagnosticSeverity Severity = DS_Error); /// \p Instr gives the original instruction that triggered the diagnostic. /// \p MsgStr gives the message. @@ -354,6 +375,57 @@ private: DiagnosticLocation Loc; }; +class DiagnosticInfoGenericWithLoc : public DiagnosticInfoWithLocationBase { +private: + /// Message to be reported. + const Twine &MsgStr; + +public: + /// \p MsgStr is the message to be reported to the frontend. + /// This class does not copy \p MsgStr, therefore the reference must be valid + /// for the whole life time of the Diagnostic. + DiagnosticInfoGenericWithLoc(const Twine &MsgStr, const Function &Fn, + const DiagnosticLocation &Loc, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfoWithLocationBase(DK_GenericWithLoc, Severity, Fn, Loc), + MsgStr(MsgStr) {} + + const Twine &getMsgStr() const { return MsgStr; } + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_GenericWithLoc; + } +}; + +class DiagnosticInfoRegAllocFailure : public DiagnosticInfoWithLocationBase { +private: + /// Message to be reported. + const Twine &MsgStr; + +public: + /// \p MsgStr is the message to be reported to the frontend. + /// This class does not copy \p MsgStr, therefore the reference must be valid + /// for the whole life time of the Diagnostic. + DiagnosticInfoRegAllocFailure(const Twine &MsgStr, const Function &Fn, + const DiagnosticLocation &DL, + DiagnosticSeverity Severity = DS_Error); + + DiagnosticInfoRegAllocFailure(const Twine &MsgStr, const Function &Fn, + DiagnosticSeverity Severity = DS_Error); + + const Twine &getMsgStr() const { return MsgStr; } + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_RegAllocFailure; + } +}; + /// Diagnostic information for stack size etc. reporting. /// This is basically a function and a size. class DiagnosticInfoResourceLimit : public DiagnosticInfoWithLocationBase { diff --git a/llvm/include/llvm/IR/InstIterator.h b/llvm/include/llvm/IR/InstIterator.h index 054fe4e9cbe9..8ce73a9b9add 100644 --- a/llvm/include/llvm/IR/InstIterator.h +++ b/llvm/include/llvm/IR/InstIterator.h @@ -104,8 +104,6 @@ public: InstIterator tmp = *this; --*this; return tmp; } - inline bool atEnd() const { return BB == BBs->end(); } - private: inline void advanceToNextBB() { // The only way that the II could be broken is if it is now pointing to diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 730baa8cc005..aa480aa8d986 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -367,6 +367,10 @@ public: /// Return true if this instruction has any metadata attached to it. bool hasMetadata() const { return DbgLoc || Value::hasMetadata(); } + // Return true if this instruction contains loop metadata other than + // a debug location + bool hasNonDebugLocLoopMetadata() const; + /// Return true if this instruction has metadata attached to it other than a /// debug location. bool hasMetadataOtherThanDebugLoc() const { return Value::hasMetadata(); } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index a42bf6bca1b9..a8df12a1282f 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1226,9 +1226,9 @@ public: return {getSwappedPredicate(Pred), Pred.hasSameSign()}; } - /// @returns the swapped predicate. - Predicate getSwappedCmpPredicate() const { - return getSwappedPredicate(getCmpPredicate()); + /// @returns the swapped predicate along with samesign information. + CmpPredicate getSwappedCmpPredicate() const { + return getSwappedCmpPredicate(getCmpPredicate()); } /// For example, EQ->EQ, SLE->SLE, UGT->SGT, etc. diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index 89dfff256e0c..82f72131b9d2 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -112,13 +112,6 @@ namespace Intrinsic { Function *getDeclarationIfExists(Module *M, ID id, ArrayRef<Type *> Tys, FunctionType *FT = nullptr); - /// Looks up Name in NameTable via binary search. NameTable must be sorted - /// and all entries must start with "llvm.". If NameTable contains an exact - /// match for Name or a prefix of Name followed by a dot, its index in - /// NameTable is returned. Otherwise, -1 is returned. - int lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable, - StringRef Name, StringRef Target = ""); - /// Map a Clang builtin name to an intrinsic ID. ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 1ca8c2565ab0..ee877349a331 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1655,7 +1655,7 @@ def int_strip_invariant_group : DefaultAttrsIntrinsic<[llvm_anyptr_ty], // def int_experimental_stackmap : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_vararg_ty], - [Throws]>; + [Throws, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_experimental_patchpoint_void : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty, diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index a91616b95568..53a66099a92b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3813,15 +3813,6 @@ let TargetPrefix = "aarch64" in { LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; - class SME2_FP8_CVT_X2_Single_Intrinsic - : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], - [llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; - // - // CVT from FP8 to deinterleaved half-precision/BFloat16 multi-vector - // - def int_aarch64_sve_fp8_cvtl1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic; - def int_aarch64_sve_fp8_cvtl2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 @@ -3864,3 +3855,216 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic; // Neon absolute maximum and minimum def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic; + +// +// FP8 Intrinsics +// +let TargetPrefix = "aarch64" in { + + // SVE Widening Conversions + class SVE2_FP8_Cvt + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_nxv16i8_ty], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt; + def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt; + def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt; + def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt; + + // SVE Narrowing Conversions + class SVE2_FP8_Narrow_Cvt + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_anyvector_ty, LLVMMatchType<0>], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; + def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; + + def int_aarch64_sve_fp8_cvtnt + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + // Dot product + class SVE2_FP8_FMLA_FDOT + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + class SVE2_FP8_FMLA_FDOT_Lane + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; + + def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT; + def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane; + + // Fused multiply-add + def int_aarch64_sve_fp8_fmlalb : SVE2_FP8_FMLA_FDOT; + def int_aarch64_sve_fp8_fmlalb_lane : SVE2_FP8_FMLA_FDOT_Lane; + + def int_aarch64_sve_fp8_fmlalt : SVE2_FP8_FMLA_FDOT; + def int_aarch64_sve_fp8_fmlalt_lane : SVE2_FP8_FMLA_FDOT_Lane; + + def int_aarch64_sve_fp8_fmlallbb : SVE2_FP8_FMLA_FDOT; + def int_aarch64_sve_fp8_fmlallbb_lane : SVE2_FP8_FMLA_FDOT_Lane; + + def int_aarch64_sve_fp8_fmlallbt : SVE2_FP8_FMLA_FDOT; + def int_aarch64_sve_fp8_fmlallbt_lane : SVE2_FP8_FMLA_FDOT_Lane; + + def int_aarch64_sve_fp8_fmlalltb : SVE2_FP8_FMLA_FDOT; + def int_aarch64_sve_fp8_fmlalltb_lane : SVE2_FP8_FMLA_FDOT_Lane; + + def int_aarch64_sve_fp8_fmlalltt : SVE2_FP8_FMLA_FDOT; + def int_aarch64_sve_fp8_fmlalltt_lane : SVE2_FP8_FMLA_FDOT_Lane; + + class SME2_FP8_CVT_X2_Single_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [llvm_nxv16i8_ty], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + class SME2_FP8_CVT_Single_X4_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + class SME_FP8_OuterProduct_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_nxv16i1_ty, llvm_nxv16i1_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_ZA_LANE_VGx1_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<3>>]>; + + class SME_FP8_ZA_LANE_VGx2_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<4>>]>; + + class SME_FP8_ZA_LANE_VGx4_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<6>>]>; + class SME_FP8_ZA_SINGLE_VGx1_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_ZA_SINGLE_VGx2_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_ZA_SINGLE_VGx4_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_ZA_MULTI_VGx2_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_ZA_MULTI_VGx4_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects]>; + // + // CVT from FP8 to half-precision/BFloat16 multi-vector + // + def int_aarch64_sve_fp8_cvt1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic; + def int_aarch64_sve_fp8_cvt2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic; + + // + // CVT from FP8 to deinterleaved half-precision/BFloat16 multi-vector + // + def int_aarch64_sve_fp8_cvtl1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic; + def int_aarch64_sve_fp8_cvtl2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic; + + // + // CVT to FP8 from half-precision/BFloat16/single-precision multi-vector + // + def int_aarch64_sve_fp8_cvt_x2 + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_anyvector_ty, LLVMMatchType<0>], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + def int_aarch64_sve_fp8_cvt_x4 : SME2_FP8_CVT_Single_X4_Intrinsic; + def int_aarch64_sve_fp8_cvtn_x4 : SME2_FP8_CVT_Single_X4_Intrinsic; + + // FP8 outer product + def int_aarch64_sme_fp8_fmopa_za16 : SME_FP8_OuterProduct_Intrinsic; + def int_aarch64_sme_fp8_fmopa_za32 : SME_FP8_OuterProduct_Intrinsic; + + // + // ZA multiply-add + // + // Double-vector groups (F8F16) + def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x1 : SME_FP8_ZA_LANE_VGx1_Intrinsic; + def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic; + // Single + def int_aarch64_sme_fp8_fmlal_single_za16_vg2x1 : SME_FP8_ZA_SINGLE_VGx1_Intrinsic; + def int_aarch64_sme_fp8_fmlal_single_za16_vg2x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fmlal_single_za16_vg2x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic; + // Multi + def int_aarch64_sme_fp8_fmlal_multi_za16_vg2x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fmlal_multi_za16_vg2x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic; + + // Quad-vector groups (F8F32) + def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x1 : SME_FP8_ZA_LANE_VGx1_Intrinsic; + def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic; + // Single + def int_aarch64_sme_fp8_fmlall_single_za32_vg4x1 : SME_FP8_ZA_SINGLE_VGx1_Intrinsic; + def int_aarch64_sme_fp8_fmlall_single_za32_vg4x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fmlall_single_za32_vg4x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic; + // Multi + def int_aarch64_sme_fp8_fmlall_multi_za32_vg4x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fmlall_multi_za32_vg4x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic; + + // + // FP8 FDOT intrinsics + // + // (indexed) + def int_aarch64_sme_fp8_fdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fdot_lane_za32_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + + def int_aarch64_sme_fp8_fdot_lane_za16_vg1x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic; + def int_aarch64_sme_fp8_fdot_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic; + // Single + def int_aarch64_sme_fp8_fdot_single_za16_vg1x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fdot_single_za32_vg1x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic; + + def int_aarch64_sme_fp8_fdot_single_za16_vg1x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic; + def int_aarch64_sme_fp8_fdot_single_za32_vg1x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic; + // Multi + def int_aarch64_sme_fp8_fdot_multi_za16_vg1x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fdot_multi_za32_vg1x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic; + + def int_aarch64_sme_fp8_fdot_multi_za16_vg1x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic; + def int_aarch64_sme_fp8_fdot_multi_za32_vg1x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic; + + // FVDOT + def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 5696345a617f..d31d5afe5145 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -21,7 +21,7 @@ def int_dx_flattened_thread_id_in_group : Intrinsic<[llvm_i32_ty], [], [IntrNoMe // type appropriate for the kind of resource given a register space ID, lower // bound and range size of the binding, as well as an index and an indicator // whether that index may be non-uniform. -def int_dx_handle_fromBinding +def int_dx_resource_handlefrombinding : DefaultAttrsIntrinsic< [llvm_any_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], @@ -30,22 +30,22 @@ def int_dx_handle_fromBinding def int_dx_resource_getpointer : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty], [IntrNoMem]>; -def int_dx_typedBufferLoad +def int_dx_resource_load_typedbuffer : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>; -def int_dx_typedBufferLoad_checkbit +def int_dx_resource_loadchecked_typedbuffer : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty], [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>; -def int_dx_typedBufferStore +def int_dx_resource_store_typedbuffer : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty], [IntrWriteMem]>; -def int_dx_bufferUpdateCounter +def int_dx_resource_updatecounter : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty], [IntrInaccessibleMemOrArgMemOnly]>; // Cast between target extension handle types and dxil-style opaque handles -def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; +def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>; def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>; @@ -98,6 +98,7 @@ def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; +def int_dx_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; def int_dx_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent, IntrNoMem]>; def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>; diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td index 29b0da0d766b..0cd6008b986f 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td @@ -6705,3 +6705,130 @@ Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf_bf">; def int_hexagon_V6_vsub_sf_bf_128B : Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_bf_128B">; +// V79 HVX Instructions. + +def int_hexagon_V6_get_qfext : +Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_get_qfext">; + +def int_hexagon_V6_get_qfext_128B : +Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_get_qfext_128B">; + +def int_hexagon_V6_get_qfext_oracc : +Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_get_qfext_oracc">; + +def int_hexagon_V6_get_qfext_oracc_128B : +Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_get_qfext_oracc_128B">; + +def int_hexagon_V6_set_qfext : +Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_set_qfext">; + +def int_hexagon_V6_set_qfext_128B : +Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_set_qfext_128B">; + +def int_hexagon_V6_vabs_f8 : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabs_f8">; + +def int_hexagon_V6_vabs_f8_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabs_f8_128B">; + +def int_hexagon_V6_vadd_hf_f8 : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_hf_f8">; + +def int_hexagon_V6_vadd_hf_f8_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_hf_f8_128B">; + +def int_hexagon_V6_vcvt2_b_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt2_b_hf">; + +def int_hexagon_V6_vcvt2_b_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt2_b_hf_128B">; + +def int_hexagon_V6_vcvt2_hf_b : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt2_hf_b">; + +def int_hexagon_V6_vcvt2_hf_b_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt2_hf_b_128B">; + +def int_hexagon_V6_vcvt2_hf_ub : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt2_hf_ub">; + +def int_hexagon_V6_vcvt2_hf_ub_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt2_hf_ub_128B">; + +def int_hexagon_V6_vcvt2_ub_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt2_ub_hf">; + +def int_hexagon_V6_vcvt2_ub_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt2_ub_hf_128B">; + +def int_hexagon_V6_vcvt_f8_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_f8_hf">; + +def int_hexagon_V6_vcvt_f8_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_f8_hf_128B">; + +def int_hexagon_V6_vcvt_hf_f8 : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_f8">; + +def int_hexagon_V6_vcvt_hf_f8_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_f8_128B">; + +def int_hexagon_V6_vfmax_f8 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmax_f8">; + +def int_hexagon_V6_vfmax_f8_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmax_f8_128B">; + +def int_hexagon_V6_vfmin_f8 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmin_f8">; + +def int_hexagon_V6_vfmin_f8_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmin_f8_128B">; + +def int_hexagon_V6_vfneg_f8 : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vfneg_f8">; + +def int_hexagon_V6_vfneg_f8_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vfneg_f8_128B">; + +def int_hexagon_V6_vmerge_qf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmerge_qf">; + +def int_hexagon_V6_vmerge_qf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmerge_qf_128B">; + +def int_hexagon_V6_vmpy_hf_f8 : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_hf_f8">; + +def int_hexagon_V6_vmpy_hf_f8_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_hf_f8_128B">; + +def int_hexagon_V6_vmpy_hf_f8_acc : +Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_hf_f8_acc">; + +def int_hexagon_V6_vmpy_hf_f8_acc_128B : +Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_hf_f8_acc_128B">; + +def int_hexagon_V6_vmpy_rt_hf : +Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpy_rt_hf">; + +def int_hexagon_V6_vmpy_rt_hf_128B : +Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpy_rt_hf_128B">; + +def int_hexagon_V6_vmpy_rt_qf16 : +Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpy_rt_qf16">; + +def int_hexagon_V6_vmpy_rt_qf16_128B : +Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpy_rt_qf16_128B">; + +def int_hexagon_V6_vmpy_rt_sf : +Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpy_rt_sf">; + +def int_hexagon_V6_vmpy_rt_sf_128B : +Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpy_rt_sf_128B">; + +def int_hexagon_V6_vsub_hf_f8 : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_hf_f8">; + +def int_hexagon_V6_vsub_hf_f8_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_hf_f8_128B">; diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 9834dbb70d4c..fd07d131ce15 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4806,6 +4806,21 @@ def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">, [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; // +// WGMMA fence instructions +// +// wgmma.fence.sync.aligned; +def int_nvvm_wgmma_fence_sync_aligned + : Intrinsic<[], [], [IntrConvergent]>; + +// wgmma.commit_group.sync.aligned; +def int_nvvm_wgmma_commit_group_sync_aligned + : Intrinsic<[], [], [IntrConvergent], "llvm.nvvm.wgmma.commit_group.sync.aligned">; + +// wgmma.wait_group.sync.aligned N; +def int_nvvm_wgmma_wait_group_sync_aligned + : Intrinsic<[], [llvm_i64_ty], [IntrConvergent, ImmArg<ArgIndex<0>>], "llvm.nvvm.wgmma.wait_group.sync.aligned">; + +// // WMMA instructions // // WMMA.LOAD diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 17b70062e58f..bcff0f20b985 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -36,8 +36,8 @@ let TargetPrefix = "spv" in { def int_spv_selection_merge : Intrinsic<[], [llvm_vararg_ty]>; def int_spv_cmpxchg : Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_vararg_ty]>; def int_spv_unreachable : Intrinsic<[], []>; - def int_spv_alloca : Intrinsic<[llvm_any_ty], []>; - def int_spv_alloca_array : Intrinsic<[llvm_any_ty], [llvm_anyint_ty]>; + def int_spv_alloca : Intrinsic<[llvm_any_ty], [llvm_i8_ty], [ImmArg<ArgIndex<0>>]>; + def int_spv_alloca_array : Intrinsic<[llvm_any_ty], [llvm_anyint_ty, llvm_i8_ty], [ImmArg<ArgIndex<1>>]>; def int_spv_undef : Intrinsic<[llvm_i32_ty], []>; def int_spv_inline_asm : Intrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, llvm_vararg_ty]>; @@ -59,6 +59,7 @@ let TargetPrefix = "spv" in { // The following intrinsic(s) are mirrored from IntrinsicsDirectX.td for HLSL support. def int_spv_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; + def int_spv_thread_id_in_group : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; def int_spv_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>; def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>; def int_spv_cross : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; @@ -86,6 +87,7 @@ let TargetPrefix = "spv" in { def int_spv_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_spv_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_spv_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; + def int_spv_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; def int_spv_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; def int_spv_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>; def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; @@ -101,7 +103,7 @@ let TargetPrefix = "spv" in { // type appropriate for the kind of resource given the set id, binding id, // array size of the binding, as well as an index and an indicator // whether that index may be non-uniform. - def int_spv_handle_fromBinding + def int_spv_resource_handlefrombinding : DefaultAttrsIntrinsic< [llvm_any_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], @@ -110,19 +112,19 @@ let TargetPrefix = "spv" in { def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; - def int_spv_bufferUpdateCounter + def int_spv_resource_updatecounter : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty], [IntrInaccessibleMemOrArgMemOnly]>; // Read a value from the image buffer. It does not translate directly to a // single OpImageRead because the result type is not necessarily a 4 element // vector. - def int_spv_typedBufferLoad + def int_spv_resource_load_typedbuffer : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty]>; // Write a value to the image buffer. Translates directly to a single // OpImageWrite. - def int_spv_typedBufferStore + def int_spv_resource_store_typedbuffer : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>; } diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index 6d4a59ba6b1f..bbd125fd38cf 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -305,7 +305,6 @@ public: /// be prepared to drop the erroneous construct on the floor and "not crash". /// The generated code need not be correct. The error message will be /// implicitly prefixed with "error: " and should not end with a ".". - void emitError(uint64_t LocCookie, const Twine &ErrorStr); void emitError(const Instruction *I, const Twine &ErrorStr); void emitError(const Twine &ErrorStr); diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 35580f3f38c6..df2384c5f6e6 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1464,6 +1464,8 @@ public: static MDNode *getMergedProfMetadata(MDNode *A, MDNode *B, const Instruction *AInstr, const Instruction *BInstr); + static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B); + static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B); }; /// Tuple of metadata. diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 2a05c2ac0758..3c586a1dd21d 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1711,10 +1711,9 @@ public: GlobalValue::GUID OrigGUID) { if (OrigGUID == 0 || ValueGUID == OrigGUID) return; - if (OidGuidMap.count(OrigGUID) && OidGuidMap[OrigGUID] != ValueGUID) - OidGuidMap[OrigGUID] = 0; - else - OidGuidMap[OrigGUID] = ValueGUID; + auto [It, Inserted] = OidGuidMap.try_emplace(OrigGUID, ValueGUID); + if (!Inserted && It->second != ValueGUID) + It->second = 0; } /// Find the summary for ValueInfo \p VI in module \p ModuleId, or nullptr if diff --git a/llvm/include/llvm/IR/NVVMIntrinsicFlags.h b/llvm/include/llvm/IR/NVVMIntrinsicFlags.h index 43dde42bbbd6..dfb6e857b3a6 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicFlags.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicFlags.h @@ -15,6 +15,8 @@ #ifndef LLVM_IR_NVVMINTRINSICFLAGS_H #define LLVM_IR_NVVMINTRINSICFLAGS_H +#include <stdint.h> + namespace llvm { namespace nvvm { diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index fc4c0124d00b..cc0e8d598ff1 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -688,7 +688,7 @@ inline api_pred_ty<is_lowbit_mask_or_zero> m_LowBitMaskOrZero(const APInt *&V) { } struct icmp_pred_with_threshold { - ICmpInst::Predicate Pred; + CmpPredicate Pred; const APInt *Thr; bool isValue(const APInt &C) { return ICmpInst::compare(C, *Thr, Pred); } }; @@ -1557,16 +1557,16 @@ template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) { // Matchers for CmpInst classes // -template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy, +template <typename LHS_t, typename RHS_t, typename Class, bool Commutable = false> struct CmpClass_match { - PredicateTy *Predicate; + CmpPredicate *Predicate; LHS_t L; RHS_t R; // The evaluation order is always stable, regardless of Commutability. // The LHS is always matched first. - CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS) + CmpClass_match(CmpPredicate &Pred, const LHS_t &LHS, const RHS_t &RHS) : Predicate(&Pred), L(LHS), R(RHS) {} CmpClass_match(const LHS_t &LHS, const RHS_t &RHS) : Predicate(nullptr), L(LHS), R(RHS) {} @@ -1575,12 +1575,13 @@ struct CmpClass_match { if (auto *I = dyn_cast<Class>(V)) { if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) { if (Predicate) - *Predicate = I->getPredicate(); + *Predicate = CmpPredicate::get(I); return true; - } else if (Commutable && L.match(I->getOperand(1)) && - R.match(I->getOperand(0))) { + } + if (Commutable && L.match(I->getOperand(1)) && + R.match(I->getOperand(0))) { if (Predicate) - *Predicate = I->getSwappedPredicate(); + *Predicate = CmpPredicate::getSwapped(I); return true; } } @@ -1589,60 +1590,58 @@ struct CmpClass_match { }; template <typename LHS, typename RHS> -inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate> -m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) { - return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R); +inline CmpClass_match<LHS, RHS, CmpInst> m_Cmp(CmpPredicate &Pred, const LHS &L, + const RHS &R) { + return CmpClass_match<LHS, RHS, CmpInst>(Pred, L, R); } template <typename LHS, typename RHS> -inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate> -m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { - return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R); +inline CmpClass_match<LHS, RHS, ICmpInst> m_ICmp(CmpPredicate &Pred, + const LHS &L, const RHS &R) { + return CmpClass_match<LHS, RHS, ICmpInst>(Pred, L, R); } template <typename LHS, typename RHS> -inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate> -m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) { - return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R); +inline CmpClass_match<LHS, RHS, FCmpInst> m_FCmp(CmpPredicate &Pred, + const LHS &L, const RHS &R) { + return CmpClass_match<LHS, RHS, FCmpInst>(Pred, L, R); } template <typename LHS, typename RHS> -inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate> -m_Cmp(const LHS &L, const RHS &R) { - return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(L, R); +inline CmpClass_match<LHS, RHS, CmpInst> m_Cmp(const LHS &L, const RHS &R) { + return CmpClass_match<LHS, RHS, CmpInst>(L, R); } template <typename LHS, typename RHS> -inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate> -m_ICmp(const LHS &L, const RHS &R) { - return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(L, R); +inline CmpClass_match<LHS, RHS, ICmpInst> m_ICmp(const LHS &L, const RHS &R) { + return CmpClass_match<LHS, RHS, ICmpInst>(L, R); } template <typename LHS, typename RHS> -inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate> -m_FCmp(const LHS &L, const RHS &R) { - return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(L, R); +inline CmpClass_match<LHS, RHS, FCmpInst> m_FCmp(const LHS &L, const RHS &R) { + return CmpClass_match<LHS, RHS, FCmpInst>(L, R); } // Same as CmpClass, but instead of saving Pred as out output variable, match a // specific input pred for equality. -template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy, +template <typename LHS_t, typename RHS_t, typename Class, bool Commutable = false> struct SpecificCmpClass_match { - const PredicateTy Predicate; + const CmpPredicate Predicate; LHS_t L; RHS_t R; - SpecificCmpClass_match(PredicateTy Pred, const LHS_t &LHS, const RHS_t &RHS) + SpecificCmpClass_match(CmpPredicate Pred, const LHS_t &LHS, const RHS_t &RHS) : Predicate(Pred), L(LHS), R(RHS) {} template <typename OpTy> bool match(OpTy *V) { if (auto *I = dyn_cast<Class>(V)) { - if (I->getPredicate() == Predicate && L.match(I->getOperand(0)) && - R.match(I->getOperand(1))) + if (CmpPredicate::getMatching(CmpPredicate::get(I), Predicate) && + L.match(I->getOperand(0)) && R.match(I->getOperand(1))) return true; if constexpr (Commutable) { - if (I->getPredicate() == Class::getSwappedPredicate(Predicate) && + if (CmpPredicate::getMatching(CmpPredicate::get(I), + CmpPredicate::getSwapped(Predicate)) && L.match(I->getOperand(1)) && R.match(I->getOperand(0))) return true; } @@ -1653,31 +1652,27 @@ struct SpecificCmpClass_match { }; template <typename LHS, typename RHS> -inline SpecificCmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate> -m_SpecificCmp(CmpInst::Predicate MatchPred, const LHS &L, const RHS &R) { - return SpecificCmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>( - MatchPred, L, R); +inline SpecificCmpClass_match<LHS, RHS, CmpInst> +m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R) { + return SpecificCmpClass_match<LHS, RHS, CmpInst>(MatchPred, L, R); } template <typename LHS, typename RHS> -inline SpecificCmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate> -m_SpecificICmp(ICmpInst::Predicate MatchPred, const LHS &L, const RHS &R) { - return SpecificCmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>( - MatchPred, L, R); +inline SpecificCmpClass_match<LHS, RHS, ICmpInst> +m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R) { + return SpecificCmpClass_match<LHS, RHS, ICmpInst>(MatchPred, L, R); } template <typename LHS, typename RHS> -inline SpecificCmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true> -m_c_SpecificICmp(ICmpInst::Predicate MatchPred, const LHS &L, const RHS &R) { - return SpecificCmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>( - MatchPred, L, R); +inline SpecificCmpClass_match<LHS, RHS, ICmpInst, true> +m_c_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R) { + return SpecificCmpClass_match<LHS, RHS, ICmpInst, true>(MatchPred, L, R); } template <typename LHS, typename RHS> -inline SpecificCmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate> -m_SpecificFCmp(FCmpInst::Predicate MatchPred, const LHS &L, const RHS &R) { - return SpecificCmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>( - MatchPred, L, R); +inline SpecificCmpClass_match<LHS, RHS, FCmpInst> +m_SpecificFCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R) { + return SpecificCmpClass_match<LHS, RHS, FCmpInst>(MatchPred, L, R); } //===----------------------------------------------------------------------===// @@ -2468,7 +2463,7 @@ struct UAddWithOverflow_match { template <typename OpTy> bool match(OpTy *V) { Value *ICmpLHS, *ICmpRHS; - ICmpInst::Predicate Pred; + CmpPredicate Pred; if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V)) return false; @@ -2738,16 +2733,15 @@ inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) { /// Matches an ICmp with a predicate over LHS and RHS in either order. /// Swaps the predicate if operands are commuted. template <typename LHS, typename RHS> -inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true> -m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { - return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L, - R); +inline CmpClass_match<LHS, RHS, ICmpInst, true> +m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R) { + return CmpClass_match<LHS, RHS, ICmpInst, true>(Pred, L, R); } template <typename LHS, typename RHS> -inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true> -m_c_ICmp(const LHS &L, const RHS &R) { - return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(L, R); +inline CmpClass_match<LHS, RHS, ICmpInst, true> m_c_ICmp(const LHS &L, + const RHS &R) { + return CmpClass_match<LHS, RHS, ICmpInst, true>(L, R); } /// Matches a specific opcode with LHS and RHS in either order. diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 13a27b58b9cd..8153845b52c7 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -307,22 +307,22 @@ HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl") HANDLE_LIBCALL(FMINIMUM_F32, "fminimumf") HANDLE_LIBCALL(FMINIMUM_F64, "fminimum") HANDLE_LIBCALL(FMINIMUM_F80, "fminimuml") -HANDLE_LIBCALL(FMINIMUM_F128, "fminmuml") +HANDLE_LIBCALL(FMINIMUM_F128, "fminimuml") HANDLE_LIBCALL(FMINIMUM_PPCF128, "fminimuml") HANDLE_LIBCALL(FMAXIMUM_F32, "fmaximumf") HANDLE_LIBCALL(FMAXIMUM_F64, "fmaximum") HANDLE_LIBCALL(FMAXIMUM_F80, "fmaximuml") -HANDLE_LIBCALL(FMAXIMUM_F128, "fmaxmuml") +HANDLE_LIBCALL(FMAXIMUM_F128, "fmaximuml") HANDLE_LIBCALL(FMAXIMUM_PPCF128, "fmaximum_numl") HANDLE_LIBCALL(FMINIMUMNUM_F32, "fminimum_numf") HANDLE_LIBCALL(FMINIMUMNUM_F64, "fminimum_num") HANDLE_LIBCALL(FMINIMUMNUM_F80, "fminimum_numl") -HANDLE_LIBCALL(FMINIMUMNUM_F128, "fminmum_numl") +HANDLE_LIBCALL(FMINIMUMNUM_F128, "fminimum_numl") HANDLE_LIBCALL(FMINIMUMNUM_PPCF128, "fminimum_numl") HANDLE_LIBCALL(FMAXIMUMNUM_F32, "fmaximum_numf") HANDLE_LIBCALL(FMAXIMUMNUM_F64, "fmaximum_num") HANDLE_LIBCALL(FMAXIMUMNUM_F80, "fmaximum_numl") -HANDLE_LIBCALL(FMAXIMUMNUM_F128, "fmaxmum_numl") +HANDLE_LIBCALL(FMAXIMUMNUM_F128, "fmaximum_numl") HANDLE_LIBCALL(FMAXIMUMNUM_PPCF128, "fmaximum_numl") HANDLE_LIBCALL(LROUND_F32, "lroundf") HANDLE_LIBCALL(LROUND_F64, "lround") diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 945081b77e95..011aedece94a 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -131,7 +131,7 @@ private: public: using iterator_category = std::forward_iterator_tag; - using value_type = UseT *; + using value_type = UseT; using difference_type = std::ptrdiff_t; using pointer = value_type *; using reference = value_type &; @@ -723,12 +723,16 @@ public: bool AllowInvariantGroup = false, function_ref<bool(Value &Value, APInt &Offset)> ExternalAnalysis = nullptr) const; - Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, - bool AllowNonInbounds, - bool AllowInvariantGroup = false) { + + Value *stripAndAccumulateConstantOffsets( + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + bool AllowInvariantGroup = false, + function_ref<bool(Value &Value, APInt &Offset)> ExternalAnalysis = + nullptr) { return const_cast<Value *>( static_cast<const Value *>(this)->stripAndAccumulateConstantOffsets( - DL, Offset, AllowNonInbounds, AllowInvariantGroup)); + DL, Offset, AllowNonInbounds, AllowInvariantGroup, + ExternalAnalysis)); } /// This is a wrapper around stripAndAccumulateConstantOffsets with the diff --git a/llvm/include/llvm/IR/VectorTypeUtils.h b/llvm/include/llvm/IR/VectorTypeUtils.h new file mode 100644 index 000000000000..d24c714f99cb --- /dev/null +++ b/llvm/include/llvm/IR/VectorTypeUtils.h @@ -0,0 +1,94 @@ +//===------- VectorTypeUtils.h - Vector type utility functions -*- C++ -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_VECTORTYPEUTILS_H +#define LLVM_IR_VECTORTYPEUTILS_H + +#include "llvm/IR/DerivedTypes.h" + +namespace llvm { + +/// A helper function for converting Scalar types to vector types. If +/// the incoming type is void, we return void. If the EC represents a +/// scalar, we return the scalar type. +inline Type *toVectorTy(Type *Scalar, ElementCount EC) { + if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar()) + return Scalar; + return VectorType::get(Scalar, EC); +} + +inline Type *toVectorTy(Type *Scalar, unsigned VF) { + return toVectorTy(Scalar, ElementCount::getFixed(VF)); +} + +/// A helper for converting structs of scalar types to structs of vector types. +/// Note: +/// - If \p EC is scalar, \p StructTy is returned unchanged +/// - Only unpacked literal struct types are supported +Type *toVectorizedStructTy(StructType *StructTy, ElementCount EC); + +/// A helper for converting structs of vector types to structs of scalar types. +/// Note: Only unpacked literal struct types are supported. +Type *toScalarizedStructTy(StructType *StructTy); + +/// Returns true if `StructTy` is an unpacked literal struct where all elements +/// are vectors of matching element count. This does not include empty structs. +bool isVectorizedStructTy(StructType *StructTy); + +/// A helper for converting to vectorized types. For scalar types, this is +/// equivalent to calling `toVectorTy`. For struct types, this returns a new +/// struct where each element type has been widened to a vector type. +/// Note: +/// - If the incoming type is void, we return void +/// - If \p EC is scalar, \p Ty is returned unchanged +/// - Only unpacked literal struct types are supported +inline Type *toVectorizedTy(Type *Ty, ElementCount EC) { + if (StructType *StructTy = dyn_cast<StructType>(Ty)) + return toVectorizedStructTy(StructTy, EC); + return toVectorTy(Ty, EC); +} + +/// A helper for converting vectorized types to scalarized (non-vector) types. +/// For vector types, this is equivalent to calling .getScalarType(). For struct +/// types, this returns a new struct where each element type has been converted +/// to a scalar type. Note: Only unpacked literal struct types are supported. +inline Type *toScalarizedTy(Type *Ty) { + if (StructType *StructTy = dyn_cast<StructType>(Ty)) + return toScalarizedStructTy(StructTy); + return Ty->getScalarType(); +} + +/// Returns true if `Ty` is a vector type or a struct of vector types where all +/// vector types share the same VF. +inline bool isVectorizedTy(Type *Ty) { + if (StructType *StructTy = dyn_cast<StructType>(Ty)) + return isVectorizedStructTy(StructTy); + return Ty->isVectorTy(); +} + +/// Returns the types contained in `Ty`. For struct types, it returns the +/// elements, all other types are returned directly. +inline ArrayRef<Type *> getContainedTypes(Type *const &Ty) { + if (auto *StructTy = dyn_cast<StructType>(Ty)) + return StructTy->elements(); + return ArrayRef<Type *>(&Ty, 1); +} + +/// Returns the number of vector elements for a vectorized type. +inline ElementCount getVectorizedTypeVF(Type *Ty) { + assert(isVectorizedTy(Ty) && "expected vectorized type"); + return cast<VectorType>(getContainedTypes(Ty).front())->getElementCount(); +} + +inline bool isUnpackedStructLiteral(StructType *StructTy) { + return StructTy->isLiteral() && !StructTy->isPacked(); +} + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 7b81c9a8e143..1cb9013bc48c 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -84,7 +84,8 @@ void initializeDAHPass(PassRegistry &); void initializeDCELegacyPassPass(PassRegistry &); void initializeDXILMetadataAnalysisWrapperPassPass(PassRegistry &); void initializeDXILMetadataAnalysisWrapperPrinterPass(PassRegistry &); -void initializeDXILResourceWrapperPassPass(PassRegistry &); +void initializeDXILResourceBindingWrapperPassPass(PassRegistry &); +void initializeDXILResourceTypeWrapperPassPass(PassRegistry &); void initializeDeadMachineInstructionElimPass(PassRegistry &); void initializeDebugifyMachineModulePass(PassRegistry &); void initializeDependenceAnalysisWrapperPassPass(PassRegistry &); @@ -158,7 +159,7 @@ void initializeLiveDebugVariablesWrapperLegacyPass(PassRegistry &); void initializeLiveIntervalsWrapperPassPass(PassRegistry &); void initializeLiveRangeShrinkPass(PassRegistry &); void initializeLiveRegMatrixWrapperLegacyPass(PassRegistry &); -void initializeLiveStacksPass(PassRegistry &); +void initializeLiveStacksWrapperLegacyPass(PassRegistry &); void initializeLiveVariablesWrapperPassPass(PassRegistry &); void initializeLoadStoreOptPass(PassRegistry &); void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 54245ca0b702..ac1970334de0 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -70,7 +70,8 @@ struct ForcePassLinking { (void)llvm::createCallGraphViewerPass(); (void)llvm::createCFGSimplificationPass(); (void)llvm::createStructurizeCFGPass(); - (void)llvm::createDXILResourceWrapperPassPass(); + (void)llvm::createDXILResourceBindingWrapperPassPass(); + (void)llvm::createDXILResourceTypeWrapperPassPass(); (void)llvm::createDeadArgEliminationPass(); (void)llvm::createDeadCodeEliminationPass(); (void)llvm::createDependenceAnalysisWrapperPass(); diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index cf31c36cc4ce..f4de106860d3 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -94,9 +94,10 @@ protected: /// constants into comdat sections. bool HasCOFFComdatConstants = false; - /// True if this is an XCOFF target that supports visibility attributes as - /// part of .global, .weak, .extern, and .comm. Default is false. - bool HasVisibilityOnlyWithLinkage = false; + bool IsAIX = false; + + // True if using the HLASM dialect on z/OS. + bool IsHLASM = false; /// This is the maximum possible length of an instruction, which is needed to /// compute the size of an inline asm. Defaults to 4. @@ -110,14 +111,6 @@ protected: /// the current PC. Defaults to false. bool DollarIsPC = false; - /// Allow '.' token, when not referencing an identifier or constant, to refer - /// to the current PC. Defaults to true. - bool DotIsPC = true; - - /// Whether the '*' token refers to the current PC. This is used for the - /// HLASM dialect. - bool StarIsPC = false; - /// This string, if specified, is used to separate instructions from each /// other when on the same line. Defaults to ';' const char *SeparatorString; @@ -126,10 +119,6 @@ protected: /// "#" StringRef CommentString; - /// This indicates whether the comment string is only accepted as a comment - /// at the beginning of statements. Defaults to false. - bool RestrictCommentStringToStartOfStatement = false; - /// This indicates whether to allow additional "comment strings" to be lexed /// as a comment. Setting this attribute to true, will ensure that C-style /// line comments (// ..), C-style block comments (/* .. */), and "#" are @@ -138,16 +127,9 @@ protected: /// Default is true. bool AllowAdditionalComments = true; - /// Should we emit the '\t' as the starting indentation marker for GNU inline - /// asm statements. Defaults to true. - bool EmitGNUAsmStartIndentationMarker = true; - /// This is appended to emitted labels. Defaults to ":" const char *LabelSuffix; - /// Emit labels in purely upper case. Defaults to false. - bool EmitLabelsInUpperCase = false; - // Print the EH begin symbol with an assignment. Defaults to false. bool UseAssignmentForEHBegin = false; @@ -209,13 +191,6 @@ protected: /// still be lexed as a comment. bool AllowAtAtStartOfIdentifier = false; - /// This is true if the assembler allows the "#" character at the start of - /// a string to be lexed as an AsmToken::Identifier. - /// If the AsmLexer determines that the string can be lexed as a possible - /// comment, setting this option will have no effect, and the string will - /// still be lexed as a comment. - bool AllowHashAtStartOfIdentifier = false; - /// If this is true, symbol names with invalid characters will be printed in /// quotes. bool SupportsQuotedNames = true; @@ -225,10 +200,6 @@ protected: /// instead. bool UseDataRegionDirectives = false; - /// True if .align is to be used for alignment. Only power-of-two - /// alignment is supported. - bool UseDotAlignForAlignment = false; - /// True if the target supports LEB128 directives. bool HasLEB128Directives = true; @@ -243,11 +214,6 @@ protected: /// "\t.zero\t" const char *ZeroDirective; - /// This should be set to true if the zero directive supports a value to emit - /// other than zero. If this is set to false, the Data*bitsDirective's will be - /// used to emit these bytes. Defaults to true. - bool ZeroDirectiveSupportsNonZeroValue = true; - /// This directive allows emission of an ascii string with the standard C /// escape characters embedded into it. If a target doesn't support this, it /// can be set to null. Defaults to "\t.ascii\t" @@ -258,16 +224,6 @@ protected: /// doesn't support this, it can be set to null. Defaults to "\t.asciz\t" const char *AscizDirective; - /// This directive accepts a comma-separated list of bytes for emission as a - /// string of bytes. For targets that do not support this, it shall be set to - /// null. Defaults to null. - const char *ByteListDirective = nullptr; - - /// This directive allows emission of a zero-terminated ascii string without - /// the standard C escape characters embedded into it. If a target doesn't - /// support this, it can be set to null. Defaults to null. - const char *PlainStringDirective = nullptr; - /// Form used for character literals in the assembly syntax. Useful for /// producing strings as byte lists. If a target does not use or support /// this, it shall be set to ACLS_Unknown. Defaults to ACLS_Unknown. @@ -348,16 +304,6 @@ protected: /// argument and how it is interpreted. Defaults to NoAlignment. LCOMM::LCOMMType LCOMMDirectiveAlignmentType = LCOMM::NoAlignment; - /// True if the target only has basename for .file directive. False if the - /// target also needs the directory along with the basename. Defaults to true. - bool HasBasenameOnlyForFileDirective = true; - - /// True if the target represents string constants as mostly raw characters in - /// paired double quotation with paired double quotation marks as the escape - /// mechanism to represent a double quotation mark within the string. Defaults - /// to false. - bool HasPairedDoubleQuoteStringConstants = false; - // True if the target allows .align directives on functions. This is true for // most targets, so defaults to true. bool HasFunctionAlignment = true; @@ -370,10 +316,6 @@ protected: /// for ELF targets. Defaults to true. bool HasSingleParameterDotFile = true; - /// True if the target has a four strings .file directive, strings separated - /// by comma. Defaults to false. - bool HasFourStringsDotFile = false; - /// True if the target has a .ident directive, this is true for ELF targets. /// Defaults to false. bool HasIdentDirective = false; @@ -440,18 +382,10 @@ protected: /// absolute difference. bool DwarfFDESymbolsUseAbsDiff = false; - /// True if the target supports generating the DWARF line table through using - /// the .loc/.file directives. Defaults to true. - bool UsesDwarfFileAndLocDirectives = true; - /// True if DWARF `.file directory' directive syntax is used by /// default. bool EnableDwarfFileDirectoryDefault = true; - /// True if the target needs the DWARF section length in the header (if any) - /// of the DWARF section in the assembly file. Defaults to true. - bool DwarfSectionSizeRequired = true; - /// True if dwarf register numbers are printed instead of symbolic register /// names in .cfi_* directives. Defaults to false. bool DwarfRegNumForCFI = false; @@ -507,9 +441,6 @@ protected: // If true, use Motorola-style integers in Assembly (ex. $0ac). bool UseMotorolaIntegers = false; - // If true, emit function descriptor symbol on AIX. - bool NeedsFunctionDescriptors = false; - public: explicit MCAsmInfo(); virtual ~MCAsmInfo(); @@ -590,12 +521,11 @@ public: // Accessors. + bool isAIX() const { return IsAIX; } + bool isHLASM() const { return IsHLASM; } bool isMachO() const { return HasSubsectionsViaSymbols; } bool hasCOFFAssociativeComdats() const { return HasCOFFAssociativeComdats; } bool hasCOFFComdatConstants() const { return HasCOFFComdatConstants; } - bool hasVisibilityOnlyWithLinkage() const { - return HasVisibilityOnlyWithLinkage; - } /// Returns the maximum possible encoded instruction size in bytes. If \p STI /// is null, this should be the maximum size for any subtarget. @@ -605,23 +535,14 @@ public: unsigned getMinInstAlignment() const { return MinInstAlignment; } bool getDollarIsPC() const { return DollarIsPC; } - bool getDotIsPC() const { return DotIsPC; } - bool getStarIsPC() const { return StarIsPC; } const char *getSeparatorString() const { return SeparatorString; } unsigned getCommentColumn() const { return CommentColumn; } void setCommentColumn(unsigned Col) { CommentColumn = Col; } StringRef getCommentString() const { return CommentString; } - bool getRestrictCommentStringToStartOfStatement() const { - return RestrictCommentStringToStartOfStatement; - } bool shouldAllowAdditionalComments() const { return AllowAdditionalComments; } - bool getEmitGNUAsmStartIndentationMarker() const { - return EmitGNUAsmStartIndentationMarker; - } const char *getLabelSuffix() const { return LabelSuffix; } - bool shouldEmitLabelsInUpperCase() const { return EmitLabelsInUpperCase; } bool useAssignmentForEHBegin() const { return UseAssignmentForEHBegin; } bool needsLocalForSize() const { return NeedsLocalForSize; } @@ -655,32 +576,20 @@ public: bool doesAllowDollarAtStartOfIdentifier() const { return AllowDollarAtStartOfIdentifier; } - bool doesAllowHashAtStartOfIdentifier() const { - return AllowHashAtStartOfIdentifier; - } bool supportsNameQuoting() const { return SupportsQuotedNames; } bool doesSupportDataRegionDirectives() const { return UseDataRegionDirectives; } - bool useDotAlignForAlignment() const { - return UseDotAlignForAlignment; - } - bool hasLEB128Directives() const { return HasLEB128Directives; } bool useFullRegisterNames() const { return PPCUseFullRegisterNames; } void setFullRegisterNames(bool V) { PPCUseFullRegisterNames = V; } const char *getZeroDirective() const { return ZeroDirective; } - bool doesZeroDirectiveSupportNonZeroValue() const { - return ZeroDirectiveSupportsNonZeroValue; - } const char *getAsciiDirective() const { return AsciiDirective; } const char *getAscizDirective() const { return AscizDirective; } - const char *getByteListDirective() const { return ByteListDirective; } - const char *getPlainStringDirective() const { return PlainStringDirective; } AsmCharLiteralSyntax characterLiteralSyntax() const { return CharacterLiteralSyntax; } @@ -700,16 +609,9 @@ public: return LCOMMDirectiveAlignmentType; } - bool hasBasenameOnlyForFileDirective() const { - return HasBasenameOnlyForFileDirective; - } - bool hasPairedDoubleQuoteStringConstants() const { - return HasPairedDoubleQuoteStringConstants; - } bool hasFunctionAlignment() const { return HasFunctionAlignment; } bool hasDotTypeDotSizeDirective() const { return HasDotTypeDotSizeDirective; } bool hasSingleParameterDotFile() const { return HasSingleParameterDotFile; } - bool hasFourStringsDotFile() const { return HasFourStringsDotFile; } bool hasIdentDirective() const { return HasIdentDirective; } bool hasNoDeadStrip() const { return HasNoDeadStrip; } const char *getWeakDirective() const { return WeakDirective; } @@ -776,13 +678,7 @@ public: return SupportsExtendedDwarfLocDirective; } - bool usesDwarfFileAndLocDirectives() const { - return UsesDwarfFileAndLocDirectives; - } - - bool needsDwarfSectionSizeInHeader() const { - return DwarfSectionSizeRequired; - } + bool usesDwarfFileAndLocDirectives() const { return !IsAIX; } bool enableDwarfFileDirectoryDefault() const { return EnableDwarfFileDirectoryDefault; @@ -832,7 +728,6 @@ public: bool shouldUseLogicalShr() const { return UseLogicalShr; } bool hasMipsExpressions() const { return HasMipsExpressions; } - bool needsFunctionDescriptors() const { return NeedsFunctionDescriptors; } bool shouldUseMotorolaIntegers() const { return UseMotorolaIntegers; } }; diff --git a/llvm/include/llvm/MC/MCCodeView.h b/llvm/include/llvm/MC/MCCodeView.h index 30923274ea72..2a57e04b2c88 100644 --- a/llvm/include/llvm/MC/MCCodeView.h +++ b/llvm/include/llvm/MC/MCCodeView.h @@ -144,11 +144,12 @@ struct MCCVFunctionInfo { class CodeViewContext { public: CodeViewContext(MCContext *MCCtx) : MCCtx(MCCtx) {} - ~CodeViewContext(); CodeViewContext &operator=(const CodeViewContext &other) = delete; CodeViewContext(const CodeViewContext &other) = delete; + void finish(); + bool isValidFileNumber(unsigned FileNumber) const; bool addFile(MCStreamer &OS, unsigned FileNumber, StringRef Filename, ArrayRef<uint8_t> ChecksumBytes, uint8_t ChecksumKind); @@ -230,9 +231,7 @@ private: /// The fragment that ultimately holds our strings. MCDataFragment *StrTabFragment = nullptr; - bool InsertedStrTabFragment = false; - - MCDataFragment *getStringTableFragment(); + SmallVector<char, 0> StrTab = {'\0'}; /// Get a string table offset. unsigned getStringTableOffset(StringRef S); diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index 87465bcfa5d6..b0a39ffacccc 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -190,22 +190,16 @@ protected: : MCEncodedFragment(FType, HasInstructions) {} public: - - using const_fixup_iterator = SmallVectorImpl<MCFixup>::const_iterator; - using fixup_iterator = SmallVectorImpl<MCFixup>::iterator; - SmallVectorImpl<char> &getContents() { return Contents; } const SmallVectorImpl<char> &getContents() const { return Contents; } + void appendContents(ArrayRef<char> C) { Contents.append(C.begin(), C.end()); } + void appendContents(size_t Num, char Elt) { Contents.append(Num, Elt); } + void setContents(ArrayRef<char> C) { Contents.assign(C.begin(), C.end()); } + SmallVectorImpl<MCFixup> &getFixups() { return Fixups; } const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; } - fixup_iterator fixup_begin() { return Fixups.begin(); } - const_fixup_iterator fixup_begin() const { return Fixups.begin(); } - - fixup_iterator fixup_end() { return Fixups.end(); } - const_fixup_iterator fixup_end() const { return Fixups.end(); } - static bool classof(const MCFragment *F) { MCFragment::FragmentType Kind = F->getKind(); return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data || diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index dcdcd094fa17..be50fd2bd21d 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -181,7 +181,6 @@ public: FragList *curFragList() const { return CurFragList; } iterator begin() const { return iterator(CurFragList->Head); } iterator end() const { return {}; } - bool empty() const { return !CurFragList->Head; } void dump() const; diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index cfb31a762184..21da4dac4872 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -424,7 +424,7 @@ public: /// Calls changeSection as needed. /// /// Returns false if the stack was empty. - bool popSection(); + virtual bool popSection(); /// Set the current section where code is being emitted to \p Section. This /// is required to update CurSection. @@ -1137,9 +1137,6 @@ public: const MCSymbol *LastLabel, const MCSymbol *Label, unsigned PointerSize) {} - - /// Do finalization for the streamer at the end of a section. - virtual void doFinalizationAtSectionEnd(MCSection *Section) {} }; /// Create a dummy machine code streamer, which does nothing. This is useful for diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h index 9891f1d127f1..535bcfe2fb6d 100644 --- a/llvm/include/llvm/MC/MCSubtargetInfo.h +++ b/llvm/include/llvm/MC/MCSubtargetInfo.h @@ -77,6 +77,7 @@ class MCSubtargetInfo { Triple TargetTriple; std::string CPU; // CPU being targeted. std::string TuneCPU; // CPU being tuned for. + ArrayRef<StringRef> ProcNames; // Processor list, including aliases ArrayRef<SubtargetFeatureKV> ProcFeatures; // Processor feature list ArrayRef<SubtargetSubTypeKV> ProcDesc; // Processor descriptions @@ -95,7 +96,8 @@ class MCSubtargetInfo { public: MCSubtargetInfo(const MCSubtargetInfo &) = default; MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU, - StringRef FS, ArrayRef<SubtargetFeatureKV> PF, + StringRef FS, ArrayRef<StringRef> PN, + ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, diff --git a/llvm/include/llvm/MC/MCXCOFFObjectWriter.h b/llvm/include/llvm/MC/MCXCOFFObjectWriter.h index 968d938a6549..04172286ca77 100644 --- a/llvm/include/llvm/MC/MCXCOFFObjectWriter.h +++ b/llvm/include/llvm/MC/MCXCOFFObjectWriter.h @@ -40,11 +40,16 @@ private: }; class XCOFFObjectWriter : public MCObjectWriter { + // AIX specific CPU type. + std::string CPUType; + public: virtual void addExceptionEntry(const MCSymbol *Symbol, const MCSymbol *Trap, unsigned LanguageCode, unsigned ReasonCode, unsigned FunctionSize, bool hasDebug) = 0; virtual void addCInfoSymEntry(StringRef Name, StringRef Metadata) = 0; + StringRef getCPUType() const { return CPUType; } + void setCPU(StringRef TargetCPU) { CPUType = TargetCPU; } }; std::unique_ptr<MCObjectWriter> diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index 8fabc78d81ae..decb6cb5455e 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -53,10 +53,8 @@ class OptTable { public: /// Entry for a single option instance in the option data table. struct Info { - /// A null terminated array of prefix strings to apply to name while - /// matching. - ArrayRef<StringLiteral> Prefixes; - StringLiteral PrefixedName; + unsigned PrefixesOffset; + unsigned PrefixedNameOffset; const char *HelpText; // Help text for specific visibilities. A list of pairs, where each pair // is a list of visibilities and a specific help string for those @@ -80,15 +78,56 @@ public: const char *AliasArgs; const char *Values; - StringRef getName() const { - unsigned PrefixLength = Prefixes.empty() ? 0 : Prefixes[0].size(); - return PrefixedName.drop_front(PrefixLength); + bool hasNoPrefix() const { return PrefixesOffset == 0; } + + unsigned getNumPrefixes(ArrayRef<unsigned> PrefixesTable) const { + return PrefixesTable[PrefixesOffset]; + } + + ArrayRef<unsigned> + getPrefixOffsets(ArrayRef<unsigned> PrefixesTable) const { + return hasNoPrefix() ? ArrayRef<unsigned>() + : PrefixesTable.slice(PrefixesOffset + 1, + getNumPrefixes(PrefixesTable)); + } + + void appendPrefixes(const char *StrTable, ArrayRef<unsigned> PrefixesTable, + SmallVectorImpl<StringRef> &Prefixes) const { + for (unsigned PrefixOffset : getPrefixOffsets(PrefixesTable)) + Prefixes.push_back(&StrTable[PrefixOffset]); + } + + StringRef getPrefix(const char *StrTable, ArrayRef<unsigned> PrefixesTable, + unsigned PrefixIndex) const { + return &StrTable[getPrefixOffsets(PrefixesTable)[PrefixIndex]]; + } + + StringRef getPrefixedName(const char *StrTable) const { + return &StrTable[PrefixedNameOffset]; + } + + StringRef getName(const char *StrTable, + ArrayRef<unsigned> PrefixesTable) const { + unsigned PrefixLength = + hasNoPrefix() ? 0 : getPrefix(StrTable, PrefixesTable, 0).size(); + return getPrefixedName(StrTable).drop_front(PrefixLength); } }; private: + // A unified string table for these options. Individual strings are stored as + // null terminated C-strings at offsets within this table. + const char *StrTable; + + // A table of different sets of prefixes. Each set starts with the number of + // prefixes in that set followed by that many offsets into the string table + // for each of the prefix strings. This is essentially a Pascal-string style + // encoding. + ArrayRef<unsigned> PrefixesTable; + /// The option information table. ArrayRef<Info> OptionInfos; + bool IgnoreCase; bool GroupedShortOptions = false; bool DashDashParsing = false; @@ -102,12 +141,12 @@ protected: /// special option like 'input' or 'unknown', and is not an option group). unsigned FirstSearchableIndex = 0; - /// The union of the first element of all option prefixes. - SmallString<8> PrefixChars; - /// The union of all option prefixes. If an argument does not begin with /// one of these, it is an input. - virtual ArrayRef<StringLiteral> getPrefixesUnion() const = 0; + SmallVector<StringRef> PrefixesUnion; + + /// The union of the first element of all option prefixes. + SmallString<8> PrefixChars; private: const Info &getInfo(OptSpecifier Opt) const { @@ -122,7 +161,8 @@ private: protected: /// Initialize OptTable using Tablegen'ed OptionInfos. Child class must /// manually call \c buildPrefixChars once they are fully constructed. - OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false); + OptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable, + ArrayRef<Info> OptionInfos, bool IgnoreCase = false); /// Build (or rebuild) the PrefixChars member. void buildPrefixChars(); @@ -130,6 +170,12 @@ protected: public: virtual ~OptTable(); + /// Return the string table used for option names. + const char *getStrTable() const { return StrTable; } + + /// Return the prefixes table used for option names. + ArrayRef<unsigned> getPrefixesTable() const { return PrefixesTable; } + /// Return the total number of option classes. unsigned getNumOptions() const { return OptionInfos.size(); } @@ -141,7 +187,25 @@ public: /// Lookup the name of the given option. StringRef getOptionName(OptSpecifier id) const { - return getInfo(id).getName(); + return getInfo(id).getName(StrTable, PrefixesTable); + } + + /// Lookup the prefix of the given option. + StringRef getOptionPrefix(OptSpecifier id) const { + const Info &I = getInfo(id); + return I.hasNoPrefix() ? StringRef() + : I.getPrefix(StrTable, PrefixesTable, 0); + } + + void appendOptionPrefixes(OptSpecifier id, + SmallVectorImpl<StringRef> &Prefixes) const { + const Info &I = getInfo(id); + I.appendPrefixes(StrTable, PrefixesTable, Prefixes); + } + + /// Lookup the prefixed name of the given option. + StringRef getOptionPrefixedName(OptSpecifier id) const { + return getInfo(id).getPrefixedName(StrTable); } /// Get the kind of the given option. @@ -353,28 +417,22 @@ private: /// Specialization of OptTable class GenericOptTable : public OptTable { - SmallVector<StringLiteral> PrefixesUnionBuffer; - protected: - GenericOptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false); - ArrayRef<StringLiteral> getPrefixesUnion() const final { - return PrefixesUnionBuffer; - } + GenericOptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable, + ArrayRef<Info> OptionInfos, bool IgnoreCase = false); }; class PrecomputedOptTable : public OptTable { - ArrayRef<StringLiteral> PrefixesUnion; - protected: - PrecomputedOptTable(ArrayRef<Info> OptionInfos, - ArrayRef<StringLiteral> PrefixesTable, + PrecomputedOptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable, + ArrayRef<Info> OptionInfos, + ArrayRef<unsigned> PrefixesUnionOffsets, bool IgnoreCase = false) - : OptTable(OptionInfos, IgnoreCase), PrefixesUnion(PrefixesTable) { + : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase) { + for (unsigned PrefixOffset : PrefixesUnionOffsets) + PrefixesUnion.push_back(&StrTable[PrefixOffset]); buildPrefixChars(); } - ArrayRef<StringLiteral> getPrefixesUnion() const final { - return PrefixesUnion; - } }; } // end namespace opt @@ -382,31 +440,35 @@ protected: } // end namespace llvm #define LLVM_MAKE_OPT_ID_WITH_ID_PREFIX( \ - ID_PREFIX, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ + ID_PREFIX, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ + ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES) \ ID_PREFIX##ID -#define LLVM_MAKE_OPT_ID(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, \ - ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, \ - HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ - LLVM_MAKE_OPT_ID_WITH_ID_PREFIX( \ - OPT_, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUE) +#define LLVM_MAKE_OPT_ID(PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, \ + GROUP, ALIAS, ALIASARGS, FLAGS, VISIBILITY, PARAM, \ + HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ + LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, \ + ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ + VISIBILITY, PARAM, HELPTEXT, \ + HELPTEXTSFORVARIANTS, METAVAR, VALUE) #define LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX( \ - ID_PREFIX, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ + ID_PREFIX, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ + ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES) \ llvm::opt::OptTable::Info { \ - PREFIX, PREFIXED_NAME, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ - ID_PREFIX##ID, llvm::opt::Option::KIND##Class, PARAM, FLAGS, \ + PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, ID_PREFIX##ID, llvm::opt::Option::KIND##Class, PARAM, FLAGS, \ VISIBILITY, ID_PREFIX##GROUP, ID_PREFIX##ALIAS, ALIASARGS, VALUES \ } -#define LLVM_CONSTRUCT_OPT_INFO(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, \ - ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, \ - HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ +#define LLVM_CONSTRUCT_OPT_INFO( \ + PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX( \ - OPT_, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) + OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ + ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES) #endif // LLVM_OPTION_OPTTABLE_H diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h index 5d16fbdb6b77..a0563da15c8e 100644 --- a/llvm/include/llvm/Option/Option.h +++ b/llvm/include/llvm/Option/Option.h @@ -100,7 +100,8 @@ public: /// Get the name of this option without any prefix. StringRef getName() const { assert(Info && "Must have a valid info!"); - return Info->getName(); + assert(Owner && "Must have a valid owner!"); + return Owner->getOptionName(Info->ID); } const Option getGroup() const { @@ -127,15 +128,16 @@ public: /// Get the default prefix for this option. StringRef getPrefix() const { - return Info->Prefixes.empty() - ? StringRef() - : static_cast<const StringRef &>(Info->Prefixes[0]); + assert(Info && "Must have a valid info!"); + assert(Owner && "Must have a valid owner!"); + return Owner->getOptionPrefix(Info->ID); } /// Get the name of this option with the default prefix. - StringLiteral getPrefixedName() const { + StringRef getPrefixedName() const { assert(Info && "Must have a valid info!"); - return Info->PrefixedName; + assert(Owner && "Must have a valid owner!"); + return Owner->getOptionPrefixedName(Info->ID); } /// Get the help text for this option. diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index d2e9e8185a2b..a84164bed46c 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -668,9 +668,6 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addIRPasses( if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableLSR) { addPass(createFunctionToLoopPassAdaptor(LoopStrengthReducePass(), /*UseMemorySSA=*/true)); - // FIXME: use -stop-after so we could remove PrintLSR - if (Opt.PrintLSR) - addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } if (getOptLevel() != CodeGenOptLevel::None) { diff --git a/llvm/include/llvm/Passes/DroppedVariableStats.h b/llvm/include/llvm/Passes/DroppedVariableStats.h new file mode 100644 index 000000000000..c4de849ca755 --- /dev/null +++ b/llvm/include/llvm/Passes/DroppedVariableStats.h @@ -0,0 +1,225 @@ +///===- DroppedVariableStats.h - Opt Diagnostics -*- C++ -*----------------===// +/// +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM +/// Exceptions. See https://llvm.org/LICENSE.txt for license information. +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +/// +///===---------------------------------------------------------------------===// +/// \file +/// Dropped Variable Statistics for Debug Information. Reports any number +/// of #dbg_value that get dropped due to an optimization pass. +/// +///===---------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_DROPPEDVARIABLESTATS_H +#define LLVM_CODEGEN_DROPPEDVARIABLESTATS_H + +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassInstrumentation.h" + +namespace llvm { + +/// A unique key that represents a debug variable. +/// First const DIScope *: Represents the scope of the debug variable. +/// Second const DIScope *: Represents the InlinedAt scope of the debug +/// variable. const DILocalVariable *: It is a pointer to the debug variable +/// itself. +using VarID = + std::tuple<const DIScope *, const DIScope *, const DILocalVariable *>; + +/// A base class to collect and print dropped debug information variable +/// statistics. +class DroppedVariableStats { +public: + DroppedVariableStats(bool DroppedVarStatsEnabled) + : DroppedVariableStatsEnabled(DroppedVarStatsEnabled) { + if (DroppedVarStatsEnabled) + llvm::outs() + << "Pass Level, Pass Name, Num of Dropped Variables, Func or " + "Module Name\n"; + }; + + virtual ~DroppedVariableStats() {} + + // We intend this to be unique per-compilation, thus no copies. + DroppedVariableStats(const DroppedVariableStats &) = delete; + void operator=(const DroppedVariableStats &) = delete; + + bool getPassDroppedVariables() { return PassDroppedVariables; } + +protected: + void setup() { + DebugVariablesStack.push_back( + {DenseMap<const Function *, DebugVariables>()}); + InlinedAts.push_back( + {DenseMap<StringRef, DenseMap<VarID, DILocation *>>()}); + } + + void cleanup() { + assert(!DebugVariablesStack.empty() && + "DebugVariablesStack shouldn't be empty!"); + assert(!InlinedAts.empty() && "InlinedAts shouldn't be empty!"); + DebugVariablesStack.pop_back(); + InlinedAts.pop_back(); + } + + bool DroppedVariableStatsEnabled = false; + struct DebugVariables { + /// DenseSet of VarIDs before an optimization pass has run. + DenseSet<VarID> DebugVariablesBefore; + /// DenseSet of VarIDs after an optimization pass has run. + DenseSet<VarID> DebugVariablesAfter; + }; + +protected: + /// A stack of a DenseMap, that maps DebugVariables for every pass to an + /// llvm::Function. A stack is used because an optimization pass can call + /// other passes. + SmallVector<DenseMap<const Function *, DebugVariables>> DebugVariablesStack; + + /// A DenseSet tracking whether a scope was visited before. + DenseSet<const DIScope *> VisitedScope; + /// A stack of DenseMaps, which map the name of an llvm::Function to a + /// DenseMap of VarIDs and their inlinedAt locations before an optimization + /// pass has run. + SmallVector<DenseMap<StringRef, DenseMap<VarID, DILocation *>>> InlinedAts; + /// Calculate the number of dropped variables in an llvm::Function or + /// llvm::MachineFunction and print the relevant information to stdout. + void calculateDroppedStatsAndPrint(DebugVariables &DbgVariables, + StringRef FuncName, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel, + const Function *Func) { + unsigned DroppedCount = 0; + DenseSet<VarID> &DebugVariablesBeforeSet = + DbgVariables.DebugVariablesBefore; + DenseSet<VarID> &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter; + DenseMap<VarID, DILocation *> &InlinedAtsMap = InlinedAts.back()[FuncName]; + // Find an Instruction that shares the same scope as the dropped #dbg_value + // or has a scope that is the child of the scope of the #dbg_value, and has + // an inlinedAt equal to the inlinedAt of the #dbg_value or it's inlinedAt + // chain contains the inlinedAt of the #dbg_value, if such an Instruction is + // found, debug information is dropped. + for (VarID Var : DebugVariablesBeforeSet) { + if (DebugVariablesAfterSet.contains(Var)) + continue; + visitEveryInstruction(DroppedCount, InlinedAtsMap, Var); + removeVarFromAllSets(Var, Func); + } + if (DroppedCount > 0) { + llvm::outs() << PassLevel << ", " << PassID << ", " << DroppedCount + << ", " << FuncOrModName << "\n"; + PassDroppedVariables = true; + } else + PassDroppedVariables = false; + } + + /// Check if a \p Var has been dropped or is a false positive. Also update the + /// \p DroppedCount if a debug variable is dropped. + bool updateDroppedCount(DILocation *DbgLoc, const DIScope *Scope, + const DIScope *DbgValScope, + DenseMap<VarID, DILocation *> &InlinedAtsMap, + VarID Var, unsigned &DroppedCount) { + // If the Scope is a child of, or equal to the DbgValScope and is inlined at + // the Var's InlinedAt location, return true to signify that the Var has + // been dropped. + if (isScopeChildOfOrEqualTo(Scope, DbgValScope)) + if (isInlinedAtChildOfOrEqualTo(DbgLoc->getInlinedAt(), + InlinedAtsMap[Var])) { + // Found another instruction in the variable's scope, so there exists a + // break point at which the variable could be observed. Count it as + // dropped. + DroppedCount++; + return true; + } + return false; + } + /// Run code to populate relevant data structures over an llvm::Function or + /// llvm::MachineFunction. + void run(DebugVariables &DbgVariables, StringRef FuncName, bool Before) { + auto &VarIDSet = (Before ? DbgVariables.DebugVariablesBefore + : DbgVariables.DebugVariablesAfter); + auto &InlinedAtsMap = InlinedAts.back(); + if (Before) + InlinedAtsMap.try_emplace(FuncName, DenseMap<VarID, DILocation *>()); + VarIDSet = DenseSet<VarID>(); + visitEveryDebugRecord(VarIDSet, InlinedAtsMap, FuncName, Before); + } + /// Populate the VarIDSet and InlinedAtMap with the relevant information + /// needed for before and after pass analysis to determine dropped variable + /// status. + void populateVarIDSetAndInlinedMap( + const DILocalVariable *DbgVar, DebugLoc DbgLoc, DenseSet<VarID> &VarIDSet, + DenseMap<StringRef, DenseMap<VarID, DILocation *>> &InlinedAtsMap, + StringRef FuncName, bool Before) { + VarID Key{DbgVar->getScope(), DbgLoc->getInlinedAtScope(), DbgVar}; + VarIDSet.insert(Key); + if (Before) + InlinedAtsMap[FuncName].try_emplace(Key, DbgLoc.getInlinedAt()); + } + /// Visit every llvm::Instruction or llvm::MachineInstruction and check if the + /// debug variable denoted by its ID \p Var may have been dropped by an + /// optimization pass. + virtual void + visitEveryInstruction(unsigned &DroppedCount, + DenseMap<VarID, DILocation *> &InlinedAtsMap, + VarID Var) = 0; + /// Visit every debug record in an llvm::Function or llvm::MachineFunction + /// and call populateVarIDSetAndInlinedMap on it. + virtual void visitEveryDebugRecord( + DenseSet<VarID> &VarIDSet, + DenseMap<StringRef, DenseMap<VarID, DILocation *>> &InlinedAtsMap, + StringRef FuncName, bool Before) = 0; + +private: + /// Remove a dropped debug variable's VarID from all Sets in the + /// DroppedVariablesBefore stack. + void removeVarFromAllSets(VarID Var, const Function *F) { + // Do not remove Var from the last element, it will be popped from the + // stack. + for (auto &DebugVariablesMap : llvm::drop_end(DebugVariablesStack)) + DebugVariablesMap[F].DebugVariablesBefore.erase(Var); + } + /// Return true if \p Scope is the same as \p DbgValScope or a child scope of + /// \p DbgValScope, return false otherwise. + bool isScopeChildOfOrEqualTo(const DIScope *Scope, + const DIScope *DbgValScope) { + while (Scope != nullptr) { + if (VisitedScope.find(Scope) == VisitedScope.end()) { + VisitedScope.insert(Scope); + if (Scope == DbgValScope) { + VisitedScope.clear(); + return true; + } + Scope = Scope->getScope(); + } else { + VisitedScope.clear(); + return false; + } + } + return false; + } + /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of + /// the InlinedAt chain, return false otherwise. + bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, + const DILocation *DbgValInlinedAt) { + if (DbgValInlinedAt == InlinedAt) + return true; + if (!DbgValInlinedAt) + return false; + auto *IA = InlinedAt; + while (IA) { + if (IA == DbgValInlinedAt) + return true; + IA = IA->getInlinedAt(); + } + return false; + } + bool PassDroppedVariables = false; +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Passes/DroppedVariableStatsIR.h b/llvm/include/llvm/Passes/DroppedVariableStatsIR.h new file mode 100644 index 000000000000..99701e8c8e1c --- /dev/null +++ b/llvm/include/llvm/Passes/DroppedVariableStatsIR.h @@ -0,0 +1,101 @@ +///===- DroppedVariableStatsIR.h - Opt Diagnostics -*- C++ -*--------------===// +/// +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM +/// Exceptions. See https://llvm.org/LICENSE.txt for license information. +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +/// +///===---------------------------------------------------------------------===// +/// \file +/// Dropped Variable Statistics for Debug Information. Reports any number +/// of #dbg_value that get dropped due to an optimization pass. +/// +///===---------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_DROPPEDVARIABLESTATSIR_H +#define LLVM_CODEGEN_DROPPEDVARIABLESTATSIR_H + +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" +#include "llvm/Passes/DroppedVariableStats.h" + +namespace llvm { + +/// A class to collect and print dropped debug information due to LLVM IR +/// optimization passes. After every LLVM IR pass is run, it will print how many +/// #dbg_values were dropped due to that pass. +class DroppedVariableStatsIR : public DroppedVariableStats { +public: + DroppedVariableStatsIR(bool DroppedVarStatsEnabled) + : llvm::DroppedVariableStats(DroppedVarStatsEnabled) {} + + void runBeforePass(Any IR) { + setup(); + if (const auto *M = unwrapIR<Module>(IR)) + return this->runOnModule(M, true); + if (const auto *F = unwrapIR<Function>(IR)) + return this->runOnFunction(F, true); + } + + void runAfterPass(StringRef P, Any IR) { + if (const auto *M = unwrapIR<Module>(IR)) + runAfterPassModule(P, M); + else if (const auto *F = unwrapIR<Function>(IR)) + runAfterPassFunction(P, F); + cleanup(); + } + + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +private: + const Function *Func; + + void runAfterPassFunction(StringRef PassID, const Function *F) { + runOnFunction(F, false); + calculateDroppedVarStatsOnFunction(F, PassID, F->getName().str(), + "Function"); + } + + void runAfterPassModule(StringRef PassID, const Module *M) { + runOnModule(M, false); + calculateDroppedVarStatsOnModule(M, PassID, M->getName().str(), "Module"); + } + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Function. + void runOnFunction(const Function *F, bool Before); + /// Iterate over all Instructions in a Function and report any dropped debug + /// information. + void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel); + /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or + /// after a pass has run to facilitate dropped variable calculation for an + /// llvm::Module. Calls runOnFunction on every Function in the Module. + void runOnModule(const Module *M, bool Before); + /// Iterate over all Functions in a Module and report any dropped debug + /// information. Will call calculateDroppedVarStatsOnFunction on every + /// Function. + void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, + StringRef FuncOrModName, + StringRef PassLevel); + /// Override base class method to run on an llvm::Function specifically. + virtual void + visitEveryInstruction(unsigned &DroppedCount, + DenseMap<VarID, DILocation *> &InlinedAtsMap, + VarID Var) override; + + /// Override base class method to run on #dbg_values specifically. + virtual void visitEveryDebugRecord( + DenseSet<VarID> &VarIDSet, + DenseMap<StringRef, DenseMap<VarID, DILocation *>> &InlinedAtsMap, + StringRef FuncName, bool Before) override; + + template <typename IRUnitT> static const IRUnitT *unwrapIR(Any IR) { + const IRUnitT **IRPtr = llvm::any_cast<const IRUnitT *>(&IR); + return IRPtr ? *IRPtr : nullptr; + } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index e65bd58dae96..29763995e8b5 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -101,6 +101,7 @@ MACHINE_FUNCTION_ANALYSIS("edge-bundles", EdgeBundlesAnalysis()) MACHINE_FUNCTION_ANALYSIS("livedebugvars", LiveDebugVariablesAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) +MACHINE_FUNCTION_ANALYSIS("live-stacks", LiveStacksAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-block-freq", MachineBlockFrequencyAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-branch-prob", @@ -116,7 +117,6 @@ MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PI MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis()) MACHINE_FUNCTION_ANALYSIS("spill-code-placement", SpillPlacementAnalysis()) MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("live-stacks", LiveStacksPass()) // MACHINE_FUNCTION_ANALYSIS("lazy-machine-bfi", // LazyMachineBlockFrequencyInfoAnalysis()) // MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopInfoAnalysis()) @@ -149,6 +149,7 @@ MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass()) MACHINE_FUNCTION_PASS("print", PrintMIRPass()) MACHINE_FUNCTION_PASS("print<livedebugvars>", LiveDebugVariablesPrinterPass(errs())) MACHINE_FUNCTION_PASS("print<live-intervals>", LiveIntervalsPrinterPass(errs())) +MACHINE_FUNCTION_PASS("print<live-stacks>", LiveStacksPrinterPass(errs())) MACHINE_FUNCTION_PASS("print<live-vars>", LiveVariablesPrinterPass(errs())) MACHINE_FUNCTION_PASS("print<machine-block-freq>", MachineBlockFrequencyPrinterPass(errs())) @@ -178,7 +179,7 @@ MACHINE_FUNCTION_PASS("verify<machine-trace-metrics>", MachineTraceMetricsVerifi PARAMS) #endif MACHINE_FUNCTION_PASS_WITH_PARAMS( - "regallocfast", "RegAllocFast", + "regallocfast", "RegAllocFastPass", [](RegAllocFastPassOptions Opts) { return RegAllocFastPass(Opts); }, [PB = this](StringRef Params) { return parseRegAllocFastPassOptions(*PB, Params); diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 9301a12c740e..4e62ee9c00da 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -25,6 +25,7 @@ #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Passes/DroppedVariableStatsIR.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Transforms/IPO/SampleProfileProbe.h" @@ -579,83 +580,6 @@ private: static void SignalHandler(void *); }; -/// A class to collect and print dropped debug information variable statistics. -/// After every LLVM IR pass is run, it will print how many #dbg_values were -/// dropped due to that pass. -class DroppedVariableStats { -public: - DroppedVariableStats(bool DroppedVarStatsEnabled) { - if (DroppedVarStatsEnabled) - llvm::outs() - << "Pass Level, Pass Name, Num of Dropped Variables, Func or " - "Module Name\n"; - }; - // We intend this to be unique per-compilation, thus no copies. - DroppedVariableStats(const DroppedVariableStats &) = delete; - void operator=(const DroppedVariableStats &) = delete; - - void registerCallbacks(PassInstrumentationCallbacks &PIC); - void runBeforePass(StringRef PassID, Any IR); - void runAfterPass(StringRef PassID, Any IR, const PreservedAnalyses &PA); - void runAfterPassInvalidated(StringRef PassID, const PreservedAnalyses &PA); - bool getPassDroppedVariables() { return PassDroppedVariables; } - -private: - bool PassDroppedVariables = false; - /// A unique key that represents a #dbg_value. - using VarID = - std::tuple<const DIScope *, const DIScope *, const DILocalVariable *>; - - struct DebugVariables { - /// DenseSet of VarIDs before an optimization pass has run. - DenseSet<VarID> DebugVariablesBefore; - /// DenseSet of VarIDs after an optimization pass has run. - DenseSet<VarID> DebugVariablesAfter; - }; - - /// A stack of a DenseMap, that maps DebugVariables for every pass to an - /// llvm::Function. A stack is used because an optimization pass can call - /// other passes. - SmallVector<DenseMap<const Function *, DebugVariables>> DebugVariablesStack; - - /// A DenseSet tracking whether a scope was visited before. - DenseSet<const DIScope *> VisitedScope; - /// A stack of DenseMaps, which map the name of an llvm::Function to a - /// DenseMap of VarIDs and their inlinedAt locations before an optimization - /// pass has run. - SmallVector<DenseMap<StringRef, DenseMap<VarID, DILocation *>>> InlinedAts; - - /// Iterate over all Functions in a Module and report any dropped debug - /// information. Will call calculateDroppedVarStatsOnFunction on every - /// Function. - void calculateDroppedVarStatsOnModule(const Module *M, StringRef PassID, - std::string FuncOrModName, - std::string PassLevel); - /// Iterate over all Instructions in a Function and report any dropped debug - /// information. - void calculateDroppedVarStatsOnFunction(const Function *F, StringRef PassID, - std::string FuncOrModName, - std::string PassLevel); - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Function. - void runOnFunction(const Function *F, bool Before); - /// Populate DebugVariablesBefore, DebugVariablesAfter, InlinedAts before or - /// after a pass has run to facilitate dropped variable calculation for an - /// llvm::Module. Calls runOnFunction on every Function in the Module. - void runOnModule(const Module *M, bool Before); - /// Remove a dropped #dbg_value VarID from all Sets in the - /// DroppedVariablesBefore stack. - void removeVarFromAllSets(VarID Var, const Function *F); - /// Return true if \p Scope is the same as \p DbgValScope or a child scope of - /// \p DbgValScope, return false otherwise. - bool isScopeChildOfOrEqualTo(DIScope *Scope, const DIScope *DbgValScope); - /// Return true if \p InlinedAt is the same as \p DbgValInlinedAt or part of - /// the InlinedAt chain, return false otherwise. - bool isInlinedAtChildOfOrEqualTo(const DILocation *InlinedAt, - const DILocation *DbgValInlinedAt); -}; - /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -673,7 +597,7 @@ class StandardInstrumentations { PrintCrashIRInstrumentation PrintCrashIR; IRChangedTester ChangeTester; VerifyInstrumentation Verify; - DroppedVariableStats DroppedStats; + DroppedVariableStatsIR DroppedStatsIR; bool VerifyEach; diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index 590185d42e72..3a018d2a95c6 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -272,6 +272,10 @@ struct CounterMappingRegion { RegionKind Kind; + bool isBranch() const { + return (Kind == BranchRegion || Kind == MCDCBranchRegion); + } + CounterMappingRegion(Counter Count, unsigned FileID, unsigned ExpandedFileID, unsigned LineStart, unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd, RegionKind Kind) @@ -360,19 +364,16 @@ struct CountedRegion : public CounterMappingRegion { uint64_t FalseExecutionCount; bool TrueFolded; bool FalseFolded; - bool HasSingleByteCoverage; - CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount, - bool HasSingleByteCoverage) + CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount) : CounterMappingRegion(R), ExecutionCount(ExecutionCount), - FalseExecutionCount(0), TrueFolded(false), FalseFolded(true), - HasSingleByteCoverage(HasSingleByteCoverage) {} + FalseExecutionCount(0), TrueFolded(false), FalseFolded(true) {} CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount, - uint64_t FalseExecutionCount, bool HasSingleByteCoverage) + uint64_t FalseExecutionCount) : CounterMappingRegion(R), ExecutionCount(ExecutionCount), FalseExecutionCount(FalseExecutionCount), TrueFolded(false), - FalseFolded(false), HasSingleByteCoverage(HasSingleByteCoverage) {} + FalseFolded(false) {} }; /// MCDC Record grouping all information together. @@ -438,7 +439,7 @@ struct MCDCRecord { }; using TestVectors = llvm::SmallVector<std::pair<TestVector, CondState>>; - using BoolVector = llvm::SmallVector<bool>; + using BoolVector = std::array<BitVector, 2>; using TVRowPair = std::pair<unsigned, unsigned>; using TVPairMap = llvm::DenseMap<unsigned, TVRowPair>; using CondIDMap = llvm::DenseMap<unsigned, unsigned>; @@ -466,7 +467,9 @@ public: return Region.getDecisionParams().NumConditions; } unsigned getNumTestVectors() const { return TV.size(); } - bool isCondFolded(unsigned Condition) const { return Folded[Condition]; } + bool isCondFolded(unsigned Condition) const { + return Folded[false][Condition] || Folded[true][Condition]; + } /// Return the evaluation of a condition (indicated by Condition) in an /// executed test vector (indicated by TestVectorIndex), which will be True, @@ -715,11 +718,9 @@ struct FunctionRecord { } void pushRegion(CounterMappingRegion Region, uint64_t Count, - uint64_t FalseCount, bool HasSingleByteCoverage) { - if (Region.Kind == CounterMappingRegion::BranchRegion || - Region.Kind == CounterMappingRegion::MCDCBranchRegion) { - CountedBranchRegions.emplace_back(Region, Count, FalseCount, - HasSingleByteCoverage); + uint64_t FalseCount) { + if (Region.isBranch()) { + CountedBranchRegions.emplace_back(Region, Count, FalseCount); // If either counter is hard-coded to zero, then this region represents a // constant-folded branch. CountedBranchRegions.back().TrueFolded = Region.Count.isZero(); @@ -728,8 +729,7 @@ struct FunctionRecord { } if (CountedRegions.empty()) ExecutionCount = Count; - CountedRegions.emplace_back(Region, Count, FalseCount, - HasSingleByteCoverage); + CountedRegions.emplace_back(Region, Count, FalseCount); } }; @@ -892,14 +892,19 @@ class CoverageData { std::vector<CountedRegion> BranchRegions; std::vector<MCDCRecord> MCDCRecords; + bool SingleByteCoverage = false; + public: CoverageData() = default; - CoverageData(StringRef Filename) : Filename(Filename) {} + CoverageData(bool Single, StringRef Filename) + : Filename(Filename), SingleByteCoverage(Single) {} /// Get the name of the file this data covers. StringRef getFilename() const { return Filename; } + bool getSingleByteCoverage() const { return SingleByteCoverage; } + /// Get an iterator over the coverage segments for this object. The segments /// are guaranteed to be uniqued and sorted by location. std::vector<CoverageSegment>::const_iterator begin() const { @@ -932,6 +937,8 @@ class CoverageMapping { DenseMap<size_t, SmallVector<unsigned, 0>> FilenameHash2RecordIndices; std::vector<std::pair<std::string, uint64_t>> FuncHashMismatches; + std::optional<bool> SingleByteCoverage; + CoverageMapping() = default; // Load coverage records from readers. diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 330cf540c099..f1010b312ee5 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -21,6 +21,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/MemProfYAML.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" @@ -716,6 +717,9 @@ public: DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> getMemProfCallerCalleePairs() const; + + // Return the entire MemProf profile. + memprof::AllMemProfData getAllMemProfData() const; }; /// Reader for the indexed binary instrprof format. @@ -823,6 +827,10 @@ public: return MemProfReader.getMemProfCallerCalleePairs(); } + memprof::AllMemProfData getAllMemProfData() const { + return MemProfReader.getAllMemProfData(); + } + /// Fill Counts with the profile data for the given function name. Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts); diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index fdb51c4ab421..67d85daa8162 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -115,21 +115,6 @@ public: void addTemporalProfileTraces(SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize); - /// Add a memprof record for a function identified by its \p Id. - void addMemProfRecord(const GlobalValue::GUID Id, - const memprof::IndexedMemProfRecord &Record); - - /// Add a memprof frame identified by the hash of the contents of the frame in - /// \p FrameId. - bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F, - function_ref<void(Error)> Warn); - - /// Add a call stack identified by the hash of the contents of the call stack - /// in \p CallStack. - bool addMemProfCallStack(const memprof::CallStackId CSId, - const llvm::SmallVector<memprof::FrameId> &CallStack, - function_ref<void(Error)> Warn); - /// Add the entire MemProfData \p Incoming to the writer context. bool addMemProfData(memprof::IndexedMemProfData Incoming, function_ref<void(Error)> Warn); @@ -229,6 +214,21 @@ private: /// Add \p Trace using reservoir sampling. void addTemporalProfileTrace(TemporalProfTraceTy Trace); + /// Add a memprof record for a function identified by its \p Id. + void addMemProfRecord(const GlobalValue::GUID Id, + const memprof::IndexedMemProfRecord &Record); + + /// Add a memprof frame identified by the hash of the contents of the frame in + /// \p FrameId. + bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F, + function_ref<void(Error)> Warn); + + /// Add a call stack identified by the hash of the contents of the call stack + /// in \p CallStack. + bool addMemProfCallStack(const memprof::CallStackId CSId, + const llvm::SmallVector<memprof::FrameId> &CallStack, + function_ref<void(Error)> Warn); + Error writeImpl(ProfOStream &OS); // Writes known header fields and reserves space for fields whose value are @@ -237,6 +237,9 @@ private: uint64_t writeHeader(const IndexedInstrProf::Header &header, const bool WritePrevVersion, ProfOStream &OS); + // Writes binary IDs. + Error writeBinaryIds(ProfOStream &OS); + // Writes compressed vtable names to profiles. Error writeVTableNames(ProfOStream &OS); }; diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 9aaa2af335a2..da0cb47508e3 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -12,7 +12,6 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/HashBuilder.h" -#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include <bitset> @@ -324,21 +323,6 @@ struct Frame { << " Column: " << Column << "\n" << " Inline: " << IsInlineFrame << "\n"; } - - // Return a hash value based on the contents of the frame. Here we use a - // cryptographic hash function to minimize the chance of hash collisions. We - // do persist FrameIds as part of memprof formats up to Version 2, inclusive. - // However, the deserializer never calls this function; it uses FrameIds - // merely as keys to look up Frames proper. - inline FrameId hash() const { - llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> - HashBuilder; - HashBuilder.add(Function, LineOffset, Column, IsInlineFrame); - llvm::BLAKE3Result<8> Hash = HashBuilder.final(); - FrameId Id; - std::memcpy(&Id, Hash.data(), sizeof(Hash)); - return Id; - } }; // A type representing the index into the table of call stacks. @@ -491,19 +475,6 @@ struct MemProfRecord { } }; -// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields -// within MemProfRecord at the same level as if the GUID were part of -// MemProfRecord. -struct GUIDMemProfRecordPair { - GlobalValue::GUID GUID; - MemProfRecord Record; -}; - -// The top-level data structure, only used with YAML for now. -struct AllMemProfData { - std::vector<GUIDMemProfRecordPair> HeapProfileRecords; -}; - // Reads a memprof schema from a buffer. All entries in the buffer are // interpreted as uint64_t. The first entry in the buffer denotes the number of // ids in the schema. Subsequent entries are integers which map to memprof::Meta @@ -789,9 +760,6 @@ public: } }; -// Compute a CallStackId for a given call stack. -CallStackId hashCallStack(ArrayRef<FrameId> CS); - namespace detail { // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have // to do so in one of two different ways depending on the type of the hash @@ -1025,10 +993,62 @@ struct IndexedMemProfData { llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks; FrameId addFrame(const Frame &F) { - const FrameId Id = F.hash(); + const FrameId Id = hashFrame(F); Frames.try_emplace(Id, F); return Id; } + + CallStackId addCallStack(ArrayRef<FrameId> CS) { + CallStackId CSId = hashCallStack(CS); + CallStacks.try_emplace(CSId, CS); + return CSId; + } + + CallStackId addCallStack(SmallVector<FrameId> &&CS) { + CallStackId CSId = hashCallStack(CS); + CallStacks.try_emplace(CSId, std::move(CS)); + return CSId; + } + +private: + // Return a hash value based on the contents of the frame. Here we use a + // cryptographic hash function to minimize the chance of hash collisions. We + // do persist FrameIds as part of memprof formats up to Version 2, inclusive. + // However, the deserializer never calls this function; it uses FrameIds + // merely as keys to look up Frames proper. + FrameId hashFrame(const Frame &F) const { + llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> + HashBuilder; + HashBuilder.add(F.Function, F.LineOffset, F.Column, F.IsInlineFrame); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + FrameId Id; + std::memcpy(&Id, Hash.data(), sizeof(Hash)); + return Id; + } + + // Compute a CallStackId for a given call stack. + CallStackId hashCallStack(ArrayRef<FrameId> CS) const; +}; + +// A convenience wrapper around FrameIdConverter and CallStackIdConverter for +// tests. +struct IndexedCallstackIdConveter { + IndexedCallstackIdConveter() = delete; + IndexedCallstackIdConveter(IndexedMemProfData &MemProfData) + : FrameIdConv(MemProfData.Frames), + CSIdConv(MemProfData.CallStacks, FrameIdConv) {} + + // Delete the copy constructor and copy assignment operator to avoid a + // situation where a copy of IndexedCallStackIdConverter gets an error in + // LastUnmappedId while the original instance doesn't. + IndexedCallstackIdConveter(const IndexedCallstackIdConveter &) = delete; + IndexedCallstackIdConveter & + operator=(const IndexedCallstackIdConveter &) = delete; + + std::vector<Frame> operator()(CallStackId CSId) { return CSIdConv(CSId); } + + FrameIdConverter<decltype(IndexedMemProfData::Frames)> FrameIdConv; + CallStackIdConverter<decltype(IndexedMemProfData::CallStacks)> CSIdConv; }; struct FrameStat { @@ -1152,95 +1172,6 @@ public: } }; } // namespace memprof - -namespace yaml { -template <> struct MappingTraits<memprof::Frame> { - static void mapping(IO &Io, memprof::Frame &F) { - Io.mapRequired("Function", F.Function); - Io.mapRequired("LineOffset", F.LineOffset); - Io.mapRequired("Column", F.Column); - Io.mapRequired("Inline", F.IsInlineFrame); - - // Assert that the definition of Frame matches what we expect. The - // structured bindings below detect changes to the number of fields. - // static_assert checks the type of each field. - const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F; - static_assert( - std::is_same_v<remove_cvref_t<decltype(Function)>, GlobalValue::GUID>); - static_assert(std::is_same_v<remove_cvref_t<decltype(SymbolName)>, - std::unique_ptr<std::string>>); - static_assert( - std::is_same_v<remove_cvref_t<decltype(LineOffset)>, uint32_t>); - static_assert(std::is_same_v<remove_cvref_t<decltype(Column)>, uint32_t>); - static_assert( - std::is_same_v<remove_cvref_t<decltype(IsInlineFrame)>, bool>); - - // MSVC issues unused variable warnings despite the uses in static_assert - // above. - (void)Function; - (void)SymbolName; - (void)LineOffset; - (void)Column; - (void)IsInlineFrame; - } -}; - -template <> struct CustomMappingTraits<memprof::PortableMemInfoBlock> { - static void inputOne(IO &Io, StringRef KeyStr, - memprof::PortableMemInfoBlock &MIB) { - // PortableMemInfoBlock keeps track of the set of fields that actually have - // values. We update the set here as we receive a key-value pair from the - // YAML document. - // - // We set MIB.Name via a temporary variable because ScalarTraits<uintptr_t> - // isn't available on macOS. -#define MIBEntryDef(NameTag, Name, Type) \ - if (KeyStr == #Name) { \ - uint64_t Value; \ - Io.mapRequired(KeyStr.str().c_str(), Value); \ - MIB.Name = static_cast<Type>(Value); \ - MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name)); \ - return; \ - } -#include "llvm/ProfileData/MIBEntryDef.inc" -#undef MIBEntryDef - Io.setError("Key is not a valid validation event"); - } - - static void output(IO &Io, memprof::PortableMemInfoBlock &VI) { - llvm_unreachable("To be implemented"); - } -}; - -template <> struct MappingTraits<memprof::AllocationInfo> { - static void mapping(IO &Io, memprof::AllocationInfo &AI) { - Io.mapRequired("Callstack", AI.CallStack); - Io.mapRequired("MemInfoBlock", AI.Info); - } -}; - -// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can -// treat the GUID and the fields within MemProfRecord at the same level as if -// the GUID were part of MemProfRecord. -template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> { - static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) { - Io.mapRequired("GUID", Pair.GUID); - Io.mapRequired("AllocSites", Pair.Record.AllocSites); - Io.mapRequired("CallSites", Pair.Record.CallSites); - } -}; - -template <> struct MappingTraits<memprof::AllMemProfData> { - static void mapping(IO &Io, memprof::AllMemProfData &Data) { - Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords); - } -}; -} // namespace yaml } // namespace llvm -LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame) -LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>) -LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo) -LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair) - #endif // LLVM_PROFILEDATA_MEMPROF_H_ diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index 0529f7946064..29d9e57cae3e 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -213,6 +213,20 @@ private: class YAMLMemProfReader final : public MemProfReader { public: YAMLMemProfReader() = default; + + // Return true if the \p DataBuffer starts with "---" indicating it is a YAML + // file. + static bool hasFormat(const MemoryBuffer &DataBuffer); + // Wrapper around hasFormat above, reading the file instead of the memory + // buffer. + static bool hasFormat(const StringRef Path); + + // Create a YAMLMemProfReader after sanity checking the contents of the file + // at \p Path or the \p Buffer. + static Expected<std::unique_ptr<YAMLMemProfReader>> create(const Twine &Path); + static Expected<std::unique_ptr<YAMLMemProfReader>> + create(std::unique_ptr<MemoryBuffer> Buffer); + void parse(StringRef YAMLData); }; } // namespace memprof diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h new file mode 100644 index 000000000000..fa1b7dd47384 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfYAML.h @@ -0,0 +1,179 @@ +#ifndef LLVM_PROFILEDATA_MEMPROFYAML_H_ +#define LLVM_PROFILEDATA_MEMPROFYAML_H_ + +#include "llvm/ProfileData/MemProf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/YAMLTraits.h" + +namespace llvm { +namespace memprof { +// A "typedef" for GUID. See ScalarTraits<memprof::GUIDHex64> for how a GUID is +// serialized and deserialized in YAML. +LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64) + +// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields +// within MemProfRecord at the same level as if the GUID were part of +// MemProfRecord. +struct GUIDMemProfRecordPair { + GUIDHex64 GUID; + MemProfRecord Record; +}; + +// The top-level data structure, only used with YAML for now. +struct AllMemProfData { + std::vector<GUIDMemProfRecordPair> HeapProfileRecords; +}; +} // namespace memprof + +namespace yaml { +template <> struct ScalarTraits<memprof::GUIDHex64> { + static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) { + // Print GUID as a 16-digit hexadecimal number. + Out << format("0x%016" PRIx64, (uint64_t)Val); + } + static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) { + // Reject decimal GUIDs. + if (all_of(Scalar, [](char C) { return std::isdigit(C); })) + return "use a hexadecimal GUID or a function instead"; + + uint64_t Num; + if (Scalar.starts_with_insensitive("0x")) { + // Accept hexadecimal numbers starting with 0x or 0X. + if (Scalar.getAsInteger(0, Num)) + return "invalid hex64 number"; + Val = Num; + } else { + // Otherwise, treat the input as a string containing a function name. + Val = memprof::IndexedMemProfRecord::getGUID(Scalar); + } + return StringRef(); + } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +template <> struct MappingTraits<memprof::Frame> { + // Essentially the same as memprof::Frame except that Function is of type + // memprof::GUIDHex64 instead of GlobalValue::GUID. This class helps in two + // ways. During serialization, we print Function as a 16-digit hexadecimal + // number. During deserialization, we accept a function name as an + // alternative to the usual GUID expressed as a hexadecimal number. + class FrameWithHex64 { + public: + FrameWithHex64(IO &) {} + FrameWithHex64(IO &, const memprof::Frame &F) + : Function(F.Function), LineOffset(F.LineOffset), Column(F.Column), + IsInlineFrame(F.IsInlineFrame) {} + memprof::Frame denormalize(IO &) { + return memprof::Frame(Function, LineOffset, Column, IsInlineFrame); + } + + memprof::GUIDHex64 Function = 0; + static_assert(std::is_same_v<decltype(Function.value), + decltype(memprof::Frame::Function)>); + decltype(memprof::Frame::LineOffset) LineOffset = 0; + decltype(memprof::Frame::Column) Column = 0; + decltype(memprof::Frame::IsInlineFrame) IsInlineFrame = false; + }; + + static void mapping(IO &Io, memprof::Frame &F) { + MappingNormalization<FrameWithHex64, memprof::Frame> Keys(Io, F); + + Io.mapRequired("Function", Keys->Function); + Io.mapRequired("LineOffset", Keys->LineOffset); + Io.mapRequired("Column", Keys->Column); + Io.mapRequired("IsInlineFrame", Keys->IsInlineFrame); + + // Assert that the definition of Frame matches what we expect. The + // structured bindings below detect changes to the number of fields. + // static_assert checks the type of each field. + const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F; + static_assert( + std::is_same_v<remove_cvref_t<decltype(Function)>, GlobalValue::GUID>); + static_assert(std::is_same_v<remove_cvref_t<decltype(SymbolName)>, + std::unique_ptr<std::string>>); + static_assert( + std::is_same_v<remove_cvref_t<decltype(LineOffset)>, uint32_t>); + static_assert(std::is_same_v<remove_cvref_t<decltype(Column)>, uint32_t>); + static_assert( + std::is_same_v<remove_cvref_t<decltype(IsInlineFrame)>, bool>); + + // MSVC issues unused variable warnings despite the uses in static_assert + // above. + (void)Function; + (void)SymbolName; + (void)LineOffset; + (void)Column; + (void)IsInlineFrame; + } + + // Request the inline notation for brevity: + // { Function: 123, LineOffset: 11, Column: 10; IsInlineFrame: true } + static const bool flow = true; +}; + +template <> struct CustomMappingTraits<memprof::PortableMemInfoBlock> { + static void inputOne(IO &Io, StringRef KeyStr, + memprof::PortableMemInfoBlock &MIB) { + // PortableMemInfoBlock keeps track of the set of fields that actually have + // values. We update the set here as we receive a key-value pair from the + // YAML document. + // + // We set MIB.Name via a temporary variable because ScalarTraits<uintptr_t> + // isn't available on macOS. +#define MIBEntryDef(NameTag, Name, Type) \ + if (KeyStr == #Name) { \ + uint64_t Value; \ + Io.mapRequired(KeyStr.str().c_str(), Value); \ + MIB.Name = static_cast<Type>(Value); \ + MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name)); \ + return; \ + } +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + Io.setError("Key is not a valid validation event"); + } + + static void output(IO &Io, memprof::PortableMemInfoBlock &MIB) { + auto Schema = MIB.getSchema(); +#define MIBEntryDef(NameTag, Name, Type) \ + if (Schema.test(llvm::to_underlying(memprof::Meta::Name))) { \ + uint64_t Value = MIB.Name; \ + Io.mapRequired(#Name, Value); \ + } +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + } +}; + +template <> struct MappingTraits<memprof::AllocationInfo> { + static void mapping(IO &Io, memprof::AllocationInfo &AI) { + Io.mapRequired("Callstack", AI.CallStack); + Io.mapRequired("MemInfoBlock", AI.Info); + } +}; + +// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can +// treat the GUID and the fields within MemProfRecord at the same level as if +// the GUID were part of MemProfRecord. +template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> { + static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) { + Io.mapRequired("GUID", Pair.GUID); + Io.mapRequired("AllocSites", Pair.Record.AllocSites); + Io.mapRequired("CallSites", Pair.Record.CallSites); + } +}; + +template <> struct MappingTraits<memprof::AllMemProfData> { + static void mapping(IO &Io, memprof::AllMemProfData &Data) { + Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords); + } +}; +} // namespace yaml +} // namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame) +LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>) +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair) + +#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_ diff --git a/llvm/include/llvm/SandboxIR/Instruction.h b/llvm/include/llvm/SandboxIR/Instruction.h index d9b0e9372581..4d21c4d3da35 100644 --- a/llvm/include/llvm/SandboxIR/Instruction.h +++ b/llvm/include/llvm/SandboxIR/Instruction.h @@ -1744,11 +1744,12 @@ public: class CatchSwitchInst : public SingleLLVMInstructionImpl<llvm::CatchSwitchInst> { -public: CatchSwitchInst(llvm::CatchSwitchInst *CSI, Context &Ctx) : SingleLLVMInstructionImpl(ClassID::CatchSwitch, Opcode::CatchSwitch, CSI, Ctx) {} + friend class Context; // For accessing the constructor in create*() +public: static CatchSwitchInst *create(Value *ParentPad, BasicBlock *UnwindBB, unsigned NumHandlers, InsertPosition Pos, Context &Ctx, const Twine &Name = ""); @@ -1833,10 +1834,11 @@ public: }; class ResumeInst : public SingleLLVMInstructionImpl<llvm::ResumeInst> { -public: ResumeInst(llvm::ResumeInst *CSI, Context &Ctx) : SingleLLVMInstructionImpl(ClassID::Resume, Opcode::Resume, CSI, Ctx) {} + friend class Context; // For accessing the constructor in create*() +public: static ResumeInst *create(Value *Exn, InsertPosition Pos, Context &Ctx); Value *getValue() const; unsigned getNumSuccessors() const { @@ -1848,10 +1850,11 @@ public: }; class SwitchInst : public SingleLLVMInstructionImpl<llvm::SwitchInst> { -public: SwitchInst(llvm::SwitchInst *SI, Context &Ctx) : SingleLLVMInstructionImpl(ClassID::Switch, Opcode::Switch, SI, Ctx) {} + friend class Context; // For accessing the constructor in create*() +public: static constexpr const unsigned DefaultPseudoIndex = llvm::SwitchInst::DefaultPseudoIndex; diff --git a/llvm/include/llvm/SandboxIR/Type.h b/llvm/include/llvm/SandboxIR/Type.h index 9d1db11edb05..3218b991b31a 100644 --- a/llvm/include/llvm/SandboxIR/Type.h +++ b/llvm/include/llvm/SandboxIR/Type.h @@ -283,11 +283,8 @@ public: } #ifndef NDEBUG - void dumpOS(raw_ostream &OS) { LLVMTy->print(OS); } - LLVM_DUMP_METHOD void dump() { - dumpOS(dbgs()); - dbgs() << "\n"; - } + void dumpOS(raw_ostream &OS); + LLVM_DUMP_METHOD void dump(); #endif // NDEBUG }; diff --git a/llvm/include/llvm/Support/AutoConvert.h b/llvm/include/llvm/Support/AutoConvert.h index 65ac576ae567..5d6d9394ef1d 100644 --- a/llvm/include/llvm/Support/AutoConvert.h +++ b/llvm/include/llvm/Support/AutoConvert.h @@ -17,6 +17,7 @@ #ifdef __MVS__ #include <_Ccsid.h> #ifdef __cplusplus +#include "llvm/Support/ErrorOr.h" #include <system_error> #endif /* __cplusplus */ @@ -54,8 +55,14 @@ std::error_code restorezOSStdHandleAutoConversion(int FD); /** \brief Set the tag information for a file descriptor. */ std::error_code setzOSFileTag(int FD, int CCSID, bool Text); -} /* namespace llvm */ -#endif /* __cplusplus */ +// Get the the tag ccsid for a file name or a file descriptor. +ErrorOr<__ccsid_t> getzOSFileTag(const char *FileName, const int FD = -1); + +// Query the file tag to determine if it needs conversion to UTF-8 codepage. +ErrorOr<bool> needzOSConversion(const char *FileName, const int FD = -1); + +} // namespace llvm +#endif // __cplusplus #endif /* __MVS__ */ diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h index 3e2f0d9b43fc..23c54ac9bb4b 100644 --- a/llvm/include/llvm/Support/Debug.h +++ b/llvm/include/llvm/Support/Debug.h @@ -61,15 +61,20 @@ void setCurrentDebugTypes(const char **Types, unsigned Count); /// /// This will emit the debug information if -debug is present, and -debug-only /// is not specified, or is specified as "bitset". -#define DEBUG_WITH_TYPE(TYPE, X) \ - do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE)) { X; } \ +#define DEBUG_WITH_TYPE(TYPE, ...) \ + do { \ + if (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE)) { \ + __VA_ARGS__; \ + } \ } while (false) #else #define isCurrentDebugType(X) (false) #define setCurrentDebugType(X) do { (void)(X); } while (false) #define setCurrentDebugTypes(X, N) do { (void)(X); (void)(N); } while (false) -#define DEBUG_WITH_TYPE(TYPE, X) do { } while (false) +#define DEBUG_WITH_TYPE(TYPE, ...) \ + do { \ + } while (false) #endif /// This boolean is set to true if the '-debug' command line option @@ -98,7 +103,7 @@ raw_ostream &dbgs(); // // LLVM_DEBUG(dbgs() << "Bitset contains: " << Bitset << "\n"); // -#define LLVM_DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X) +#define LLVM_DEBUG(...) DEBUG_WITH_TYPE(DEBUG_TYPE, __VA_ARGS__) } // end namespace llvm diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h index d0eec2070b4d..a4a680c97a07 100644 --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -456,7 +456,7 @@ public: bool isReachableFromEntry(const NodeT *A) const { assert(!this->isPostDominator() && "This is not implemented for post dominators"); - return isReachableFromEntry(getNode(const_cast<NodeT *>(A))); + return isReachableFromEntry(getNode(A)); } bool isReachableFromEntry(const DomTreeNodeBase<NodeT> *A) const { return A; } @@ -1014,11 +1014,7 @@ bool DominatorTreeBase<NodeT, IsPostDom>::dominates(const NodeT *A, if (A == B) return true; - // Cast away the const qualifiers here. This is ok since - // this function doesn't actually return the values returned - // from getNode. - return dominates(getNode(const_cast<NodeT *>(A)), - getNode(const_cast<NodeT *>(B))); + return dominates(getNode(A), getNode(B)); } template <typename NodeT, bool IsPostDom> bool DominatorTreeBase<NodeT, IsPostDom>::properlyDominates( @@ -1026,11 +1022,7 @@ bool DominatorTreeBase<NodeT, IsPostDom>::properlyDominates( if (A == B) return false; - // Cast away the const qualifiers here. This is ok since - // this function doesn't actually return the values returned - // from getNode. - return dominates(getNode(const_cast<NodeT *>(A)), - getNode(const_cast<NodeT *>(B))); + return dominates(getNode(A), getNode(B)); } } // end namespace llvm diff --git a/llvm/include/llvm/Support/Memory.h b/llvm/include/llvm/Support/Memory.h index c02a3cc14dc7..a587f2a8542b 100644 --- a/llvm/include/llvm/Support/Memory.h +++ b/llvm/include/llvm/Support/Memory.h @@ -15,6 +15,7 @@ #include "llvm/Support/DataTypes.h" #include <system_error> +#include <utility> namespace llvm { diff --git a/llvm/include/llvm/Support/TypeName.h b/llvm/include/llvm/Support/TypeName.h index 9547e76a7fa7..baa7a691302e 100644 --- a/llvm/include/llvm/Support/TypeName.h +++ b/llvm/include/llvm/Support/TypeName.h @@ -13,18 +13,8 @@ namespace llvm { -/// We provide a function which tries to compute the (demangled) name of a type -/// statically. -/// -/// This routine may fail on some platforms or for particularly unusual types. -/// Do not use it for anything other than logging and debugging aids. It isn't -/// portable or dependendable in any real sense. -/// -/// The returned StringRef will point into a static storage duration string. -/// However, it may not be null terminated and may be some strangely aligned -/// inner substring of a larger string. -template <typename DesiredTypeName> -inline StringRef getTypeName() { +namespace detail { +template <typename DesiredTypeName> inline StringRef getTypeNameImpl() { #if defined(__clang__) || defined(__GNUC__) StringRef Name = __PRETTY_FUNCTION__; @@ -38,7 +28,7 @@ inline StringRef getTypeName() { #elif defined(_MSC_VER) StringRef Name = __FUNCSIG__; - StringRef Key = "getTypeName<"; + StringRef Key = "getTypeNameImpl<"; Name = Name.substr(Name.find(Key)); assert(!Name.empty() && "Unable to find the function name!"); Name = Name.drop_front(Key.size()); @@ -58,6 +48,22 @@ inline StringRef getTypeName() { return "UNKNOWN_TYPE"; #endif } +} // namespace detail + +/// We provide a function which tries to compute the (demangled) name of a type +/// statically. +/// +/// This routine may fail on some platforms or for particularly unusual types. +/// Do not use it for anything other than logging and debugging aids. It isn't +/// portable or dependendable in any real sense. +/// +/// The returned StringRef will point into a static storage duration string. +/// However, it may not be null terminated and may be some strangely aligned +/// inner substring of a larger string. +template <typename DesiredTypeName> inline StringRef getTypeName() { + static StringRef Name = detail::getTypeNameImpl<DesiredTypeName>(); + return Name; +} } // namespace llvm diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index e64b78c3c1e3..81a925742578 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -860,6 +860,7 @@ public: LOG2, REPR, LISTFLATTEN, + INITIALIZED, }; private: diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index 29bdb9c1746d..d3d19c8a7dc9 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -39,7 +39,6 @@ struct CGPassBuilderOption { bool DisableLSR = false; bool DisableCGP = false; - bool PrintLSR = false; bool DisableMergeICmps = false; bool DisablePartialLibcallInlining = false; bool DisableConstantHoisting = false; diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index c8f91cd0de59..e134bab61bf6 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -99,7 +99,7 @@ def G_PHI : GenericInstruction { } def G_FRAME_INDEX : GenericInstruction { - let OutOperandList = (outs type0:$dst); + let OutOperandList = (outs ptype0:$dst); let InOperandList = (ins unknown:$src2); let hasSideEffects = false; } diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index b0c63fc7c7b8..8641eabbdd84 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -424,11 +424,22 @@ def unary_undef_to_zero: GICombineRule< [{ return Helper.matchOperandIsUndef(*${root}, 1); }]), (apply [{ Helper.replaceInstWithConstant(*${root}, 0); }])>; +def unary_undef_to_undef_frags : GICombinePatFrag< + (outs root:$dst), (ins), + !foreach(op, + [G_TRUNC, G_BITCAST, G_ANYEXT, G_PTRTOINT, G_INTTOPTR, G_FPTOSI, + G_FPTOUI], + (pattern (op $dst, $x), (G_IMPLICIT_DEF $x)))>; +def unary_undef_to_undef : GICombineRule< + (defs root:$dst), + (match (unary_undef_to_undef_frags $dst)), + (apply [{ Helper.replaceInstWithUndef(*${dst}.getParent()); }])>; + // Instructions where if any source operand is undef, the instruction can be // replaced with undef. def propagate_undef_any_op: GICombineRule< (defs root:$root), - (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root, + (match (wip_match_opcode G_ADD, G_SUB, G_XOR):$root, [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; @@ -1361,6 +1372,12 @@ def select_to_iminmax: GICombineRule< [{ return Helper.matchSelectIMinMax(${root}, ${info}); }]), (apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>; +def simplify_neg_minmax : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SUB):$root, + [{ return Helper.matchSimplifyNegMinMax(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + def match_selects : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (wip_match_opcode G_SELECT):$root, @@ -1919,6 +1936,7 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, binop_left_undef_to_zero, binop_right_undef_to_undef, unary_undef_to_zero, + unary_undef_to_undef, propagate_undef_any_op, propagate_undef_all_ops, propagate_undef_shuffle_mask, @@ -1996,7 +2014,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, and_or_disjoint_mask, fma_combines, fold_binop_into_select, sub_add_reg, select_to_minmax, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, - combine_concat_vector, + simplify_neg_minmax, combine_concat_vector, sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines, combine_use_vector_truncate, merge_combines, overflow_combines]>; diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 2148f5be4c41..c8c0eeb57099 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -70,6 +70,8 @@ def : GINodeEquiv<G_SDIV, sdiv>; def : GINodeEquiv<G_UDIV, udiv>; def : GINodeEquiv<G_SREM, srem>; def : GINodeEquiv<G_UREM, urem>; +def : GINodeEquiv<G_SDIVREM, sdivrem>; +def : GINodeEquiv<G_UDIVREM, udivrem>; def : GINodeEquiv<G_AND, and>; def : GINodeEquiv<G_OR, or>; def : GINodeEquiv<G_XOR, xor>; diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 0c09cfe68478..4864ba843f48 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -82,7 +82,8 @@ public: virtual void Initialize(MCContext &ctx, const TargetMachine &TM); virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, - const MCSymbol *Sym) const; + const MCSymbol *Sym, + const MachineModuleInfo *MMI) const; /// Emit the module-level metadata that the platform cares about. virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M) const {} diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 7bb6c3156c43..bee0a4298c78 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -128,8 +128,8 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3, [ // fshl, fshr def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift SDTCisSameAs<0, 1>, SDTCisInt<2> ]>; -def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem - SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0> +def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // smullohi, umullohi, sdivrem, udivrem + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> ]>; def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix, sdivfix, etc SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> @@ -2086,4 +2086,12 @@ class ComplexPattern<ValueType ty, int numops, string fn, list<SDNode> RootNodes = roots; list<SDNodeProperty> Properties = props; int Complexity = complexity; + + // Set this to true if SelectFunc wants an additional argument + // that is the root of the matched pattern. + bit WantsRoot = false; + + // Set this to true if SelectFunc wants an additional argument + // that is the parent of the matched node. + bit WantsParent = false; } diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 131132982182..ac8006d671a0 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Bitset.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/VersionTuple.h" @@ -272,9 +273,10 @@ bool isX18ReservedByDefault(const Triple &TT); unsigned getFMVPriority(ArrayRef<StringRef> Features); // For given feature names, return a bitmask corresponding to the entries of -// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks -// themselves, they are sequential (0, 1, 2, 3, ...). -uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs); +// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks themselves, +// they are sequential (0, 1, 2, 3, ...). The resulting bitmask is used at +// runtime to test whether a certain FMV feature is available on the host. +uint64_t getCpuSupportsMask(ArrayRef<StringRef> Features); void PrintSupportedExtensions(); diff --git a/llvm/include/llvm/TargetParser/ARMTargetParser.def b/llvm/include/llvm/TargetParser/ARMTargetParser.def index c5cd1b1bc637..6b96c3e83c8c 100644 --- a/llvm/include/llvm/TargetParser/ARMTargetParser.def +++ b/llvm/include/llvm/TargetParser/ARMTargetParser.def @@ -374,6 +374,9 @@ ARM_CPU_NAME("cortex-a78ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_RAS | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_FP16 | ARM::AEK_DOTPROD) +ARM_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false, + (ARM::AEK_DOTPROD | ARM::AEK_FP16FML | ARM::AEK_BF16 | + ARM::AEK_SB | ARM::AEK_I8MM)) ARM_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, (ARM::AEK_DOTPROD | ARM::AEK_FP16FML | ARM::AEK_BF16 | ARM::AEK_SB | ARM::AEK_I8MM)) diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 3a1a962003ab..bd4051d00edb 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -193,7 +193,8 @@ public: Mesa, SUSE, OpenEmbedded, - LastVendorType = OpenEmbedded + Intel, + LastVendorType = Intel }; enum OSType { UnknownOS, diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h new file mode 100644 index 000000000000..344a49df5cbf --- /dev/null +++ b/llvm/include/llvm/Telemetry/Telemetry.h @@ -0,0 +1,160 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides the basic framework for Telemetry. +/// Refer to its documentation at llvm/docs/Telemetry.rst for more details. +//===---------------------------------------------------------------------===// + +#ifndef LLVM_TELEMETRY_TELEMETRY_H +#define LLVM_TELEMETRY_TELEMETRY_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include <map> +#include <memory> +#include <optional> +#include <string> +#include <type_traits> +#include <vector> + +namespace llvm { +namespace telemetry { + +class Serializer { +public: + virtual ~Serializer() = default; + + virtual Error init() = 0; + virtual void write(StringRef KeyName, bool Value) = 0; + virtual void write(StringRef KeyName, StringRef Value) = 0; + virtual void write(StringRef KeyName, int Value) = 0; + virtual void write(StringRef KeyName, long Value) = 0; + virtual void write(StringRef KeyName, long long Value) = 0; + virtual void write(StringRef KeyName, unsigned int Value) = 0; + virtual void write(StringRef KeyName, unsigned long Value) = 0; + virtual void write(StringRef KeyName, unsigned long long Value) = 0; + virtual void beginObject(StringRef KeyName) = 0; + virtual void endObject() = 0; + virtual Error finalize() = 0; + + template <typename T, typename = typename T::mapped_type> + void write(StringRef KeyName, const T &Map) { + static_assert(std::is_convertible_v<typename T::key_type, StringRef>, + "KeyType must be convertible to string"); + beginObject(KeyName); + for (const auto &KeyVal : Map) + write(KeyVal.first, KeyVal.second); + endObject(); + } +}; + +/// Configuration for the Manager class. +/// This stores configurations from both users and vendors and is passed +/// to the Manager upon construction. (Any changes to the config after +/// the Manager's construction will not have any effect on it). +/// +/// This struct can be extended as needed to add additional configuration +/// points specific to a vendor's implementation. +struct Config { + virtual ~Config() = default; + + // If true, telemetry will be enabled. + const bool EnableTelemetry; + Config(bool E) : EnableTelemetry(E) {} + + virtual std::optional<std::string> makeSessionId() { return std::nullopt; } +}; + +/// For isa, dyn_cast, etc operations on TelemetryInfo. +typedef unsigned KindType; +/// This struct is used by TelemetryInfo to support isa<>, dyn_cast<> +/// operations. +/// It is defined as a struct (rather than an enum) because it is +/// expected to be extended by subclasses which may have +/// additional TelemetryInfo types defined to describe different events. +struct EntryKind { + static const KindType Base = 0; +}; + +/// TelemetryInfo is the data courier, used to move instrumented data +/// from the tool being monitored to the Telemetry framework. +/// +/// This base class contains only the basic set of telemetry data. +/// Downstream implementations can define more subclasses with +/// additional fields to describe different events and concepts. +/// +/// For example, The LLDB debugger can define a DebugCommandInfo subclass +/// which has additional fields about the debug-command being instrumented, +/// such as `CommandArguments` or `CommandName`. +struct TelemetryInfo { + // This represents a unique-id, conventionally corresponding to + // a tool's session - i.e., every time the tool starts until it exits. + // + // Note: a tool could have multiple sessions running at once, in which + // case, these shall be multiple sets of TelemetryInfo with multiple unique + // IDs. + // + // Different usages can assign different types of IDs to this field. + std::string SessionId; + + TelemetryInfo() = default; + virtual ~TelemetryInfo() = default; + + virtual void serialize(Serializer &serializer) const; + + // For isa, dyn_cast, etc, operations. + virtual KindType getKind() const { return EntryKind::Base; } + static bool classof(const TelemetryInfo *T) { + return T->getKind() == EntryKind::Base; + } +}; + +/// This class presents a data sink to which the Telemetry framework +/// sends data. +/// +/// Its implementation is transparent to the framework. +/// It is up to the vendor to decide which pieces of data to forward +/// and where to forward them. +class Destination { +public: + virtual ~Destination() = default; + virtual Error receiveEntry(const TelemetryInfo *Entry) = 0; + virtual StringLiteral name() const = 0; +}; + +/// This class is the main interaction point between any LLVM tool +/// and this framework. +/// It is responsible for collecting telemetry data from the tool being +/// monitored and transmitting the data elsewhere. +class Manager { +public: + virtual ~Manager() = default; + + // Optional callback for subclasses to perform additional tasks before + // dispatching to Destinations. + virtual Error preDispatch(TelemetryInfo *Entry) = 0; + + // Dispatch Telemetry data to the Destination(s). + // The argument is non-const because the Manager may add or remove + // data from the entry. + virtual Error dispatch(TelemetryInfo *Entry); + + // Register a Destination. + void addDestination(std::unique_ptr<Destination> Destination); + +private: + std::vector<std::unique_ptr<Destination>> Destinations; +}; + +} // namespace telemetry +} // namespace llvm + +#endif // LLVM_TELEMETRY_TELEMETRY_H diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 8915969f7546..a8ee3cd531e4 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -3853,7 +3853,7 @@ struct AANoAlias /// See AbstractAttribute::isValidIRPositionForInit static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { - if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy()) + if (!IRP.getAssociatedType()->isPointerTy()) return false; return IRAttribute::isValidIRPositionForInit(A, IRP); } @@ -4220,7 +4220,7 @@ struct AADereferenceable /// See AbstractAttribute::isValidIRPositionForInit static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { - if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy()) + if (!IRP.getAssociatedType()->isPointerTy()) return false; return IRAttribute::isValidIRPositionForInit(A, IRP); } @@ -4364,7 +4364,7 @@ struct AANoCapture /// See AbstractAttribute::isValidIRPositionForInit static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { - if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy()) + if (!IRP.getAssociatedType()->isPointerTy()) return false; return IRAttribute::isValidIRPositionForInit(A, IRP); } @@ -4635,8 +4635,7 @@ struct AAMemoryBehavior /// See AbstractAttribute::isValidIRPositionForInit static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { - if (!IRP.isFunctionScope() && - !IRP.getAssociatedType()->isPtrOrPtrVectorTy()) + if (!IRP.isFunctionScope() && !IRP.getAssociatedType()->isPointerTy()) return false; return IRAttribute::isValidIRPositionForInit(A, IRP); } diff --git a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h index b1b1ece3eff5..ee71aa64f85e 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h +++ b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h @@ -16,9 +16,32 @@ class Function; /// A pass to instrument code and perform run-time bounds checking on loads, /// stores, and other memory intrinsics. -struct BoundsCheckingPass : PassInfoMixin<BoundsCheckingPass> { +class BoundsCheckingPass : public PassInfoMixin<BoundsCheckingPass> { + +public: + enum class ReportingMode { + Trap, + MinRuntime, + MinRuntimeAbort, + FullRuntime, + FullRuntimeAbort, + }; + + struct BoundsCheckingOptions { + BoundsCheckingOptions(ReportingMode Mode, bool Merge); + + ReportingMode Mode; + bool Merge; + }; + + BoundsCheckingPass(BoundsCheckingOptions Options) : Options(Options) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); static bool isRequired() { return true; } + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); + +private: + BoundsCheckingOptions Options; }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index 344c9215fb82..169f757e580d 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -66,8 +66,11 @@ namespace memprof { // Extract all calls from the IR. Arrange them in a map from caller GUIDs to a // list of call sites, each of the form {LineLocation, CalleeGUID}. -DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> -extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI); +DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> extractCallsFromIR( + Module &M, const TargetLibraryInfo &TLI, + function_ref<bool(uint64_t)> IsPresentInProfile = [](uint64_t) { + return true; + }); struct LineLocationHash { uint64_t operator()(const LineLocation &Loc) const { diff --git a/llvm/include/llvm/Transforms/Instrumentation/PoisonChecking.h b/llvm/include/llvm/Transforms/Instrumentation/PoisonChecking.h deleted file mode 100644 index 4ec7ec809db7..000000000000 --- a/llvm/include/llvm/Transforms/Instrumentation/PoisonChecking.h +++ /dev/null @@ -1,23 +0,0 @@ -//===- PoisonChecking.h - ---------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_POISONCHECKING_H -#define LLVM_TRANSFORMS_INSTRUMENTATION_POISONCHECKING_H - -#include "llvm/IR/PassManager.h" - -namespace llvm { - -struct PoisonCheckingPass : public PassInfoMixin<PoisonCheckingPass> { - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - -} - -#endif // LLVM_TRANSFORMS_INSTRUMENTATION_POISONCHECKING_H diff --git a/llvm/include/llvm/Transforms/Instrumentation/RealtimeSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/RealtimeSanitizer.h index f2ce1636551c..5ee1cb044393 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/RealtimeSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/RealtimeSanitizer.h @@ -23,13 +23,9 @@ namespace llvm { -struct RealtimeSanitizerOptions {}; - -class RealtimeSanitizerPass : public PassInfoMixin<RealtimeSanitizerPass> { -public: - RealtimeSanitizerPass(const RealtimeSanitizerOptions &Options); - PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM); - +/// Create ctor and init functions. +struct RealtimeSanitizerPass : public PassInfoMixin<RealtimeSanitizerPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/TypeSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/TypeSanitizer.h new file mode 100644 index 000000000000..a6cc56df35f1 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/TypeSanitizer.h @@ -0,0 +1,38 @@ +//===- Transforms/Instrumentation/TypeSanitizer.h - TySan Pass -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the type sanitizer pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_TYPESANITIZER_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_TYPESANITIZER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Function; +class FunctionPass; +class Module; + +/// A function pass for tysan instrumentation. +struct TypeSanitizerPass : public PassInfoMixin<TypeSanitizerPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + static bool isRequired() { return true; } +}; + +/// A module pass for tysan instrumentation. +/// +/// Create ctor and init functions. +struct ModuleTypeSanitizerPass : public PassInfoMixin<ModuleTypeSanitizerPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +} // namespace llvm +#endif /* LLVM_TRANSFORMS_INSTRUMENTATION_TYPESANITIZER_H */ diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 049d68b8a306..2fcb64206387 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -119,8 +119,7 @@ struct ClonedCodeInfo { /// parameter. BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, - ClonedCodeInfo *CodeInfo = nullptr, - DebugInfoFinder *DIFinder = nullptr); + ClonedCodeInfo *CodeInfo = nullptr); /// Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed @@ -183,6 +182,28 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc, ValueMapTypeRemapper *TypeMapper = nullptr, ValueMaterializer *Materializer = nullptr); +/// Clone OldFunc's metadata into NewFunc. +/// +/// The caller is expected to populate \p VMap beforehand and set an appropriate +/// \p RemapFlag. Subprograms/CUs/types that were already mapped to themselves +/// won't be duplicated. +/// +/// NOTE: This function doesn't clone !llvm.dbg.cu when cloning into a different +/// module. Use CloneFunctionInto for that behavior. +void CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc, + ValueToValueMapTy &VMap, RemapFlags RemapFlag, + ValueMapTypeRemapper *TypeMapper = nullptr, + ValueMaterializer *Materializer = nullptr); + +/// Clone OldFunc's body into NewFunc. +void CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc, + ValueToValueMapTy &VMap, RemapFlags RemapFlag, + SmallVectorImpl<ReturnInst *> &Returns, + const char *NameSuffix = "", + ClonedCodeInfo *CodeInfo = nullptr, + ValueMapTypeRemapper *TypeMapper = nullptr, + ValueMaterializer *Materializer = nullptr); + void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, @@ -221,6 +242,14 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F, CloneFunctionChangeType Changes, DebugInfoFinder &DIFinder); +/// Build a map of debug info to use during Metadata cloning. +/// Returns true if cloning would need module level changes and false if there +/// would only be local changes. +bool BuildDebugInfoMDMap(DenseMap<const Metadata *, TrackingMDRef> &MD, + CloneFunctionChangeType Changes, + DebugInfoFinder &DIFinder, + DISubprogram *SPClonedWithinModule); + /// This class captures the data input to the InlineFunction call, and records /// the auxiliary results produced by it. class InlineFunctionInfo { diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h index 5d53773b5d6b..118037625421 100644 --- a/llvm/include/llvm/Transforms/Utils/Evaluator.h +++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -125,9 +125,6 @@ private: ValueStack.back()[V] = C; } - /// Casts call result to a type of bitcast call expression - Constant *castCallResultIfNeeded(Type *ReturnType, Constant *RV); - /// Given call site return callee and list of its formal arguments Function *getCalleeWithFormalArgs(CallBase &CB, SmallVectorImpl<Constant *> &Formals); diff --git a/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h b/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h index 7ea50a5584dd..e1f8065f8011 100644 --- a/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h +++ b/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h @@ -55,29 +55,49 @@ template <typename MarkerTy> struct ShouldRunExtraPasses { /// request additional transformations on demand. An example is extra /// simplifications after loop-vectorization, if runtime checks have been added. template <typename MarkerTy> -struct ExtraFunctionPassManager : public FunctionPassManager { +class ExtraFunctionPassManager + : public PassInfoMixin<ExtraFunctionPassManager<MarkerTy>> { + FunctionPassManager InnerFPM; + +public: + template <typename PassT> void addPass(PassT &&Pass) { + InnerFPM.addPass(std::move(Pass)); + } + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { auto PA = PreservedAnalyses::all(); if (AM.getCachedResult<MarkerTy>(F)) - PA.intersect(FunctionPassManager::run(F, AM)); + PA.intersect(InnerFPM.run(F, AM)); PA.abandon<MarkerTy>(); return PA; } + + static bool isRequired() { return true; } }; /// A pass manager to run a set of extra loop passes if the MarkerTy analysis is /// present. This allows passes to request additional transformations on demand. /// An example is doing additional runs of SimpleLoopUnswitch. template <typename MarkerTy> -struct ExtraLoopPassManager : public LoopPassManager { +class ExtraLoopPassManager + : public PassInfoMixin<ExtraLoopPassManager<MarkerTy>> { + LoopPassManager InnerLPM; + +public: + template <typename PassT> void addPass(PassT &&Pass) { + InnerLPM.addPass(std::move(Pass)); + } + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U) { auto PA = PreservedAnalyses::all(); if (AM.getCachedResult<MarkerTy>(L)) - PA.intersect(LoopPassManager::run(L, AM, AR, U)); + PA.intersect(InnerLPM.run(L, AM, AR, U)); PA.abandon<MarkerTy>(); return PA; } + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h index c28f868039a1..19c5f7449f23 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h @@ -317,6 +317,7 @@ protected: int cmpNumbers(uint64_t L, uint64_t R) const; int cmpAligns(Align L, Align R) const; int cmpAPInts(const APInt &L, const APInt &R) const; + int cmpConstantRanges(const ConstantRange &L, const ConstantRange &R) const; int cmpAPFloats(const APFloat &L, const APFloat &R) const; int cmpMem(StringRef L, StringRef R) const; diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index bbf29e6f46b4..40c448593807 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -412,6 +412,11 @@ Instruction *removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr); bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr, MemorySSAUpdater *MSSAU = nullptr); +/// DO NOT CALL EXTERNALLY. +/// FIXME: https://github.com/llvm/llvm-project/issues/121495 +/// Once external callers of this function are removed, either inline into +/// combineMetadataForCSE, or internalize and remove KnownIDs parameter. +/// /// Combine the metadata of two instructions so that K can replace J. Some /// metadata kinds can only be kept if K does not move, meaning it dominated /// J in the original IR. diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 4b3d6fbed836..b4cd52fef70f 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -419,6 +419,12 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi); +/// Create a reduction of the given vector \p Src for a reduction of the +/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction +/// operation is described by \p Desc. +Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, + const RecurrenceDescriptor &Desc); + /// Create a generic reduction using a recurrence descriptor \p Desc /// Fast-math-flags are propagated using the RecurrenceDescriptor. Value *createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h index 73649766a953..989cf0b2d0e7 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -188,6 +188,13 @@ public: /// Return false if a sub-class wants to keep one of the loads/stores /// after the SSA construction. virtual bool shouldDelete(Instruction *I) const { return true; } + + /// Return the value to use for the point in the code that the alloca is + /// positioned. This will only be used if an Alloca is included in Insts, + /// otherwise the value of a uninitialized load will be assumed to be poison. + virtual Value *getValueToUseForAlloca(Instruction *AI) const { + return nullptr; + } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index dc7e484a40a4..fbe80eddbae0 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -395,6 +395,11 @@ public: /// Returns the uncountable early exiting block. BasicBlock *getUncountableEarlyExitingBlock() const { + if (!HasUncountableEarlyExit) { + assert(getUncountableExitingBlocks().empty() && + "Expected no uncountable exiting blocks"); + return nullptr; + } assert(getUncountableExitingBlocks().size() == 1 && "Expected only a single uncountable exiting block"); return getUncountableExitingBlocks()[0]; diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h index 02935d0943f7..b4e5735eb143 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -170,6 +170,15 @@ void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr); +/// Same as above, but the debug message and optimization remark are identical +inline void reportVectorizationFailure(const StringRef DebugMsg, + const StringRef ORETag, + OptimizationRemarkEmitter *ORE, + Loop *TheLoop, + Instruction *I = nullptr) { + reportVectorizationFailure(DebugMsg, DebugMsg, ORETag, ORE, TheLoop, I); +} + /// A marker analysis to determine if extra passes should be run after loop /// vectorization. struct ShouldRunExtraVectorPasses diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h index 68a2daca1403..f423e1ee456c 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h @@ -220,6 +220,14 @@ class MemDGNode final : public DGNode { void setNextNode(MemDGNode *N) { NextMemN = N; } void setPrevNode(MemDGNode *N) { PrevMemN = N; } friend class DependencyGraph; // For setNextNode(), setPrevNode(). + void detachFromChain() { + if (PrevMemN != nullptr) + PrevMemN->NextMemN = NextMemN; + if (NextMemN != nullptr) + NextMemN->PrevMemN = PrevMemN; + PrevMemN = nullptr; + NextMemN = nullptr; + } public: MemDGNode(Instruction *I) : DGNode(I, DGNodeID::MemDGNode) { @@ -293,6 +301,7 @@ private: Context *Ctx = nullptr; std::optional<Context::CallbackID> CreateInstrCB; std::optional<Context::CallbackID> EraseInstrCB; + std::optional<Context::CallbackID> MoveInstrCB; std::unique_ptr<BatchAAResults> BatchAA; @@ -329,18 +338,23 @@ private: /// chain. void createNewNodes(const Interval<Instruction> &NewInterval); + /// Helper for `notify*Instr()`. \Returns the first MemDGNode that comes + /// before \p N, including or excluding \p N based on \p IncludingN, or + /// nullptr if not found. + MemDGNode *getMemDGNodeBefore(DGNode *N, bool IncludingN) const; + /// Helper for `notifyMoveInstr()`. \Returns the first MemDGNode that comes + /// after \p N, including or excluding \p N based on \p IncludingN, or nullptr + /// if not found. + MemDGNode *getMemDGNodeAfter(DGNode *N, bool IncludingN) const; + /// Called by the callbacks when a new instruction \p I has been created. - void notifyCreateInstr(Instruction *I) { - getOrCreateNode(I); - // TODO: Update the dependencies for the new node. - // TODO: Update the MemDGNode chain to include the new node if needed. - } - /// Called by the callbacks when instruction \p I is about to get deleted. - void notifyEraseInstr(Instruction *I) { - InstrToNodeMap.erase(I); - // TODO: Update the dependencies. - // TODO: Update the MemDGNode chain to remove the node if needed. - } + void notifyCreateInstr(Instruction *I); + /// Called by the callbacks when instruction \p I is about to get + /// deleted. + void notifyEraseInstr(Instruction *I); + /// Called by the callbacks when instruction \p I is about to be moved to + /// \p To. + void notifyMoveInstr(Instruction *I, const BBIterator &To); public: /// This constructor also registers callbacks. @@ -350,12 +364,18 @@ public: [this](Instruction *I) { notifyCreateInstr(I); }); EraseInstrCB = Ctx.registerEraseInstrCallback( [this](Instruction *I) { notifyEraseInstr(I); }); + MoveInstrCB = Ctx.registerMoveInstrCallback( + [this](Instruction *I, const BBIterator &To) { + notifyMoveInstr(I, To); + }); } ~DependencyGraph() { if (CreateInstrCB) Ctx->unregisterCreateInstrCallback(*CreateInstrCB); if (EraseInstrCB) Ctx->unregisterEraseInstrCallback(*EraseInstrCB); + if (MoveInstrCB) + Ctx->unregisterMoveInstrCallback(*MoveInstrCB); } DGNode *getNode(Instruction *I) const { diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h index e2d0b82489dd..922dd2c3a1f8 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h @@ -21,8 +21,10 @@ #define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_INSTRINTERVAL_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/SandboxIR/Instruction.h" #include "llvm/Support/raw_ostream.h" #include <iterator> +#include <type_traits> namespace llvm::sandboxir { @@ -207,6 +209,28 @@ public: return {NewTop, NewBottom}; } + /// Update the interval when \p I is about to be moved before \p Before. + // SFINAE disables this for non-Instructions. + template <typename HelperT = T> + std::enable_if_t<std::is_same<HelperT, Instruction>::value, void> + notifyMoveInstr(HelperT *I, decltype(I->getIterator()) BeforeIt) { + assert(contains(I) && "Expect `I` in interval!"); + assert(I->getIterator() != BeforeIt && "Can't move `I` before itself!"); + + // Nothing to do if the instruction won't move. + if (std::next(I->getIterator()) == BeforeIt) + return; + + T *NewTop = Top->getIterator() == BeforeIt ? I + : I == Top ? Top->getNextNode() + : Top; + T *NewBottom = std::next(Bottom->getIterator()) == BeforeIt ? I + : I == Bottom ? Bottom->getPrevNode() + : Bottom; + Top = NewTop; + Bottom = NewBottom; + } + #ifndef NDEBUG void print(raw_ostream &OS) const { auto *Top = top(); diff --git a/llvm/include/module.modulemap b/llvm/include/module.modulemap index b00da6d7cd28..6beb0e03e222 100644 --- a/llvm/include/module.modulemap +++ b/llvm/include/module.modulemap @@ -346,6 +346,7 @@ extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap" // TargetParser module before building the TargetParser module itself. module TargetParserGen { module AArch64TargetParserDef { + textual header "llvm/TargetParser/AArch64CPUFeatures.inc" header "llvm/TargetParser/AArch64TargetParser.h" extern module LLVM_Extern_TargetParser_Gen "module.extern.modulemap" export * @@ -426,3 +427,12 @@ module LLVM_WindowsManifest { umbrella "llvm/WindowsManifest" module * { export * } } + +module LLVM_SandboxIR { + requires cplusplus + + umbrella "llvm/SandboxIR" + module * { export * } + + textual header "llvm/SandboxIR/Values.def" +} |
