diff options
Diffstat (limited to 'clang/lib/CodeGen/CGHLSLRuntime.cpp')
| -rw-r--r-- | clang/lib/CodeGen/CGHLSLRuntime.cpp | 289 |
1 files changed, 266 insertions, 23 deletions
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index ec02096787c7..208afff24d49 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -13,10 +13,11 @@ //===----------------------------------------------------------------------===// #include "CGHLSLRuntime.h" -#include "Address.h" #include "CGDebugInfo.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "HLSLBufferLayoutBuilder.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attrs.inc" @@ -26,6 +27,7 @@ #include "clang/AST/Type.h" #include "clang/Basic/TargetOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/HLSL/RootSignatureMetadata.h" @@ -278,23 +280,18 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() { // Emits constant global variables for buffer constants declarations // and creates metadata linking the constant globals with the buffer global. -void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, - llvm::GlobalVariable *BufGV) { +void CGHLSLRuntime::emitBufferGlobalsAndMetadata( + const HLSLBufferDecl *BufDecl, llvm::GlobalVariable *BufGV, + const CGHLSLOffsetInfo &OffsetInfo) { LLVMContext &Ctx = CGM.getLLVMContext(); // get the layout struct from constant buffer target type llvm::Type *BufType = BufGV->getValueType(); - llvm::Type *BufLayoutType = - cast<llvm::TargetExtType>(BufType)->getTypeParameter(0); llvm::StructType *LayoutStruct = cast<llvm::StructType>( - cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0)); + cast<llvm::TargetExtType>(BufType)->getTypeParameter(0)); - // Start metadata list associating the buffer global variable with its - // constatns - SmallVector<llvm::Metadata *> BufGlobals; - BufGlobals.push_back(ValueAsMetadata::get(BufGV)); - - const auto *ElemIt = LayoutStruct->element_begin(); + SmallVector<std::pair<VarDecl *, uint32_t>> DeclsWithOffset; + size_t OffsetIdx = 0; for (Decl *D : BufDecl->buffer_decls()) { if (isa<CXXRecordDecl, EmptyDecl>(D)) // Nothing to do for this declaration. @@ -326,14 +323,28 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, continue; } + DeclsWithOffset.emplace_back(VD, OffsetInfo[OffsetIdx++]); + } + + if (!OffsetInfo.empty()) + llvm::stable_sort(DeclsWithOffset, [](const auto &LHS, const auto &RHS) { + return CGHLSLOffsetInfo::compareOffsets(LHS.second, RHS.second); + }); + + // Associate the buffer global variable with its constants + SmallVector<llvm::Metadata *> BufGlobals; + BufGlobals.reserve(DeclsWithOffset.size() + 1); + BufGlobals.push_back(ValueAsMetadata::get(BufGV)); + + auto ElemIt = LayoutStruct->element_begin(); + for (auto &[VD, _] : DeclsWithOffset) { + if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt)) + ++ElemIt; + assert(ElemIt != LayoutStruct->element_end() && "number of elements in layout struct does not match"); llvm::Type *LayoutType = *ElemIt++; - // FIXME: handle resources inside user defined structs - // (llvm/wg-hlsl#175) - - // create global variable for the constant and to metadata list GlobalVariable *ElemGV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(VD, LayoutType)); BufGlobals.push_back(ValueAsMetadata::get(ElemGV)); @@ -410,18 +421,17 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) { // create global variable for the constant buffer CGHLSLOffsetInfo OffsetInfo = CGHLSLOffsetInfo::fromDecl(*BufDecl); - llvm::TargetExtType *TargetTy = cast<llvm::TargetExtType>( - convertHLSLSpecificType(ResHandleTy, OffsetInfo)); + llvm::Type *LayoutTy = convertHLSLSpecificType(ResHandleTy, OffsetInfo); llvm::GlobalVariable *BufGV = new GlobalVariable( - TargetTy, /*isConstant*/ false, - GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy), + LayoutTy, /*isConstant*/ false, + GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy), llvm::formatv("{0}{1}", BufDecl->getName(), BufDecl->isCBuffer() ? ".cb" : ".tb"), GlobalValue::NotThreadLocal); CGM.getModule().insertGlobalVariable(BufGV); // Add globals for constant buffer elements and create metadata nodes - emitBufferGlobalsAndMetadata(BufDecl, BufGV); + emitBufferGlobalsAndMetadata(BufDecl, BufGV, OffsetInfo); // Initialize cbuffer from binding (implicit or explicit) initializeBufferFromBinding(BufDecl, BufGV); @@ -440,7 +450,7 @@ void CGHLSLRuntime::addRootSignature( SignatureDecl->getRootElements(), nullptr, M); } -llvm::TargetExtType * +llvm::StructType * CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) { const auto Entry = LayoutTypes.find(StructType); if (Entry != LayoutTypes.end()) @@ -449,7 +459,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) { } void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType, - llvm::TargetExtType *LayoutTy) { + llvm::StructType *LayoutTy) { assert(getHLSLBufferLayoutType(StructType) == nullptr && "layout type for this struct already exist"); LayoutTypes[StructType] = LayoutTy; @@ -1103,3 +1113,236 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr( } return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl); } + +std::optional<LValue> CGHLSLRuntime::emitBufferArraySubscriptExpr( + const ArraySubscriptExpr *E, CodeGenFunction &CGF, + llvm::function_ref<llvm::Value *(bool Promote)> EmitIdxAfterBase) { + // Find the element type to index by first padding the element type per HLSL + // buffer rules, and then padding out to a 16-byte register boundary if + // necessary. + llvm::Type *LayoutTy = + HLSLBufferLayoutBuilder(CGF.CGM).layOutType(E->getType()); + uint64_t LayoutSizeInBits = + CGM.getDataLayout().getTypeSizeInBits(LayoutTy).getFixedValue(); + CharUnits ElementSize = CharUnits::fromQuantity(LayoutSizeInBits / 8); + CharUnits RowAlignedSize = ElementSize.alignTo(CharUnits::fromQuantity(16)); + if (RowAlignedSize > ElementSize) { + llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding( + CGM, RowAlignedSize - ElementSize); + assert(Padding && "No padding type for target?"); + LayoutTy = llvm::StructType::get(CGF.getLLVMContext(), {LayoutTy, Padding}, + /*isPacked=*/true); + } + + // If the layout type doesn't introduce any padding, we don't need to do + // anything special. + llvm::Type *OrigTy = CGF.CGM.getTypes().ConvertTypeForMem(E->getType()); + if (LayoutTy == OrigTy) + return std::nullopt; + + LValueBaseInfo EltBaseInfo; + TBAAAccessInfo EltTBAAInfo; + Address Addr = + CGF.EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); + llvm::Value *Idx = EmitIdxAfterBase(/*Promote*/ true); + + // Index into the object as-if we have an array of the padded element type, + // and then dereference the element itself to avoid reading padding that may + // be past the end of the in-memory object. + SmallVector<llvm::Value *, 2> Indices; + Indices.push_back(Idx); + Indices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0)); + + llvm::Value *GEP = CGF.Builder.CreateGEP(LayoutTy, Addr.emitRawPointer(CGF), + Indices, "cbufferidx"); + Addr = Address(GEP, Addr.getElementType(), RowAlignedSize, KnownNonNull); + + return CGF.MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo); +} + +namespace { +/// Utility for emitting copies following the HLSL buffer layout rules (ie, +/// copying out of a cbuffer). +class HLSLBufferCopyEmitter { + CodeGenFunction &CGF; + Address DestPtr; + Address SrcPtr; + llvm::Type *LayoutTy = nullptr; + + SmallVector<llvm::Value *> CurStoreIndices; + SmallVector<llvm::Value *> CurLoadIndices; + + void emitCopyAtIndices(llvm::Type *FieldTy, llvm::ConstantInt *StoreIndex, + llvm::ConstantInt *LoadIndex) { + CurStoreIndices.push_back(StoreIndex); + CurLoadIndices.push_back(LoadIndex); + auto RestoreIndices = llvm::make_scope_exit([&]() { + CurStoreIndices.pop_back(); + CurLoadIndices.pop_back(); + }); + + // First, see if this is some kind of aggregate and recurse. + if (processArray(FieldTy)) + return; + if (processBufferLayoutArray(FieldTy)) + return; + if (processStruct(FieldTy)) + return; + + // When we have a scalar or vector element we can emit the copy. + CharUnits Align = CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getABITypeAlign(FieldTy)); + Address SrcGEP = RawAddress( + CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(), + CurLoadIndices, "cbuf.src"), + FieldTy, Align, SrcPtr.isKnownNonNull()); + Address DestGEP = CGF.Builder.CreateInBoundsGEP( + DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest"); + llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load"); + CGF.Builder.CreateStore(Load, DestGEP); + } + + bool processArray(llvm::Type *FieldTy) { + auto *AT = dyn_cast<llvm::ArrayType>(FieldTy); + if (!AT) + return false; + + // If we have an llvm::ArrayType this is just a regular array with no top + // level padding, so all we need to do is copy each member. + for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I) + emitCopyAtIndices(AT->getElementType(), + llvm::ConstantInt::get(CGF.SizeTy, I), + llvm::ConstantInt::get(CGF.SizeTy, I)); + return true; + } + + bool processBufferLayoutArray(llvm::Type *FieldTy) { + // A buffer layout array is a struct with two elements: the padded array, + // and the last element. That is, is should look something like this: + // + // { [%n x { %type, %padding }], %type } + // + auto *ST = dyn_cast<llvm::StructType>(FieldTy); + if (!ST || ST->getNumElements() != 2) + return false; + + auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0)); + if (!PaddedEltsTy) + return false; + + auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType()); + if (!PaddedTy || PaddedTy->getNumElements() != 2) + return false; + + if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding( + PaddedTy->getElementType(1))) + return false; + + llvm::Type *ElementTy = ST->getElementType(1); + if (PaddedTy->getElementType(0) != ElementTy) + return false; + + // All but the last of the logical array elements are in the padded array. + unsigned NumElts = PaddedEltsTy->getNumElements() + 1; + + // Add an extra indirection to the load for the struct and walk the + // array prefix. + CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0)); + for (unsigned I = 0; I < NumElts - 1; ++I) { + // We need to copy the element itself, without the padding. + CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, I)); + emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.SizeTy, I), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); + CurLoadIndices.pop_back(); + } + CurLoadIndices.pop_back(); + + // Now copy the last element. + emitCopyAtIndices(ElementTy, + llvm::ConstantInt::get(CGF.SizeTy, NumElts - 1), + llvm::ConstantInt::get(CGF.Int32Ty, 1)); + + return true; + } + + bool processStruct(llvm::Type *FieldTy) { + auto *ST = dyn_cast<llvm::StructType>(FieldTy); + if (!ST) + return false; + + // Copy the struct field by field, but skip any explicit padding. + unsigned Skipped = 0; + for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) { + llvm::Type *ElementTy = ST->getElementType(I); + if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy)) + ++Skipped; + else + emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.Int32Ty, I), + llvm::ConstantInt::get(CGF.Int32Ty, I + Skipped)); + } + return true; + } + +public: + HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr) + : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {} + + bool emitCopy(QualType CType) { + LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType); + + // TODO: We should be able to fall back to a regular memcpy if the layout + // type doesn't have any padding, but that runs into issues in the backend + // currently. + // + // See https://github.com/llvm/wg-hlsl/issues/351 + emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0), + llvm::ConstantInt::get(CGF.SizeTy, 0)); + return true; + } +}; +} // namespace + +bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DestPtr, + Address SrcPtr, QualType CType) { + return HLSLBufferCopyEmitter(CGF, DestPtr, SrcPtr).emitCopy(CType); +} + +LValue CGHLSLRuntime::emitBufferMemberExpr(CodeGenFunction &CGF, + const MemberExpr *E) { + LValue Base = + CGF.EmitCheckedLValue(E->getBase(), CodeGenFunction::TCK_MemberAccess); + auto *Field = dyn_cast<FieldDecl>(E->getMemberDecl()); + assert(Field && "Unexpected access into HLSL buffer"); + + // Get the field index for the struct. + const RecordDecl *Rec = Field->getParent(); + unsigned FieldIdx = + CGM.getTypes().getCGRecordLayout(Rec).getLLVMFieldNo(Field); + + // Work out the buffer layout type to index into. + QualType RecType = CGM.getContext().getCanonicalTagType(Rec); + assert(RecType->isStructureOrClassType() && "Invalid type in HLSL buffer"); + // Since this is a member of an object in the buffer and not the buffer's + // struct/class itself, we shouldn't have any offsets on the members we need + // to contend with. + CGHLSLOffsetInfo EmptyOffsets; + llvm::StructType *LayoutTy = HLSLBufferLayoutBuilder(CGM).layOutStruct( + RecType->getAsCanonical<RecordType>(), EmptyOffsets); + + // Now index into the struct, making sure that the type we return is the + // buffer layout type rather than the original type in the AST. + QualType FieldType = Field->getType(); + llvm::Type *FieldLLVMTy = CGM.getTypes().ConvertTypeForMem(FieldType); + CharUnits Align = CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getABITypeAlign(FieldLLVMTy)); + Address Addr(CGF.Builder.CreateStructGEP(LayoutTy, Base.getPointer(CGF), + FieldIdx, Field->getName()), + FieldLLVMTy, Align, KnownNonNull); + + LValue LV = LValue::MakeAddr(Addr, FieldType, CGM.getContext(), + LValueBaseInfo(AlignmentSource::Type), + CGM.getTBAAAccessInfo(FieldType)); + LV.getQuals().addCVRQualifiers(Base.getVRQualifiers()); + + return LV; +} |
