summaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGHLSLRuntime.cpp
diff options
context:
space:
mode:
authorJustin Bogner <mail@justinbogner.com>2025-11-18 13:38:43 -0800
committerGitHub <noreply@github.com>2025-11-18 13:38:43 -0800
commitc4898f3f229027e6cbdf8f9db77b8c14d70f6599 (patch)
treedddb506e5d74cd0333f0e631080077ec808f42be /clang/lib/CodeGen/CGHLSLRuntime.cpp
parent31ec633a0edaeca4d68f7f04269223a4c29396c5 (diff)
[HLSL][DirectX] Use a padding type for HLSL buffers. (#167404)
This change drops the use of the "Layout" type and instead uses explicit padding throughout the compiler to represent types in HLSL buffers. There are a few parts to this, though it's difficult to split them up as they're very interdependent: 1. Refactor HLSLBufferLayoutBuilder to allow us to calculate the padding of arbitrary types. 2. Teach Clang CodeGen to use HLSL specific paths for cbuffers when generating aggregate copies, array accesses, and structure accesses. 3. Simplify DXILCBufferAccesses such that it directly replaces accesses with dx.resource.getpointer rather than recalculating the layout. 4. Basic infrastructure for SPIR-V handling, but the implementation itself will need work in follow ups. Fixes several issues, including #138996, #144573, and #156084. Resolves #147352.
Diffstat (limited to 'clang/lib/CodeGen/CGHLSLRuntime.cpp')
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.cpp289
1 files changed, 266 insertions, 23 deletions
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index ec02096787c7..208afff24d49 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -13,10 +13,11 @@
//===----------------------------------------------------------------------===//
#include "CGHLSLRuntime.h"
-#include "Address.h"
#include "CGDebugInfo.h"
+#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "HLSLBufferLayoutBuilder.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attrs.inc"
@@ -26,6 +27,7 @@
#include "clang/AST/Type.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Frontend/HLSL/RootSignatureMetadata.h"
@@ -278,23 +280,18 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() {
// Emits constant global variables for buffer constants declarations
// and creates metadata linking the constant globals with the buffer global.
-void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
- llvm::GlobalVariable *BufGV) {
+void CGHLSLRuntime::emitBufferGlobalsAndMetadata(
+ const HLSLBufferDecl *BufDecl, llvm::GlobalVariable *BufGV,
+ const CGHLSLOffsetInfo &OffsetInfo) {
LLVMContext &Ctx = CGM.getLLVMContext();
// get the layout struct from constant buffer target type
llvm::Type *BufType = BufGV->getValueType();
- llvm::Type *BufLayoutType =
- cast<llvm::TargetExtType>(BufType)->getTypeParameter(0);
llvm::StructType *LayoutStruct = cast<llvm::StructType>(
- cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0));
+ cast<llvm::TargetExtType>(BufType)->getTypeParameter(0));
- // Start metadata list associating the buffer global variable with its
- // constatns
- SmallVector<llvm::Metadata *> BufGlobals;
- BufGlobals.push_back(ValueAsMetadata::get(BufGV));
-
- const auto *ElemIt = LayoutStruct->element_begin();
+ SmallVector<std::pair<VarDecl *, uint32_t>> DeclsWithOffset;
+ size_t OffsetIdx = 0;
for (Decl *D : BufDecl->buffer_decls()) {
if (isa<CXXRecordDecl, EmptyDecl>(D))
// Nothing to do for this declaration.
@@ -326,14 +323,28 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
continue;
}
+ DeclsWithOffset.emplace_back(VD, OffsetInfo[OffsetIdx++]);
+ }
+
+ if (!OffsetInfo.empty())
+ llvm::stable_sort(DeclsWithOffset, [](const auto &LHS, const auto &RHS) {
+ return CGHLSLOffsetInfo::compareOffsets(LHS.second, RHS.second);
+ });
+
+ // Associate the buffer global variable with its constants
+ SmallVector<llvm::Metadata *> BufGlobals;
+ BufGlobals.reserve(DeclsWithOffset.size() + 1);
+ BufGlobals.push_back(ValueAsMetadata::get(BufGV));
+
+ auto ElemIt = LayoutStruct->element_begin();
+ for (auto &[VD, _] : DeclsWithOffset) {
+ if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt))
+ ++ElemIt;
+
assert(ElemIt != LayoutStruct->element_end() &&
"number of elements in layout struct does not match");
llvm::Type *LayoutType = *ElemIt++;
- // FIXME: handle resources inside user defined structs
- // (llvm/wg-hlsl#175)
-
- // create global variable for the constant and to metadata list
GlobalVariable *ElemGV =
cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(VD, LayoutType));
BufGlobals.push_back(ValueAsMetadata::get(ElemGV));
@@ -410,18 +421,17 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) {
// create global variable for the constant buffer
CGHLSLOffsetInfo OffsetInfo = CGHLSLOffsetInfo::fromDecl(*BufDecl);
- llvm::TargetExtType *TargetTy = cast<llvm::TargetExtType>(
- convertHLSLSpecificType(ResHandleTy, OffsetInfo));
+ llvm::Type *LayoutTy = convertHLSLSpecificType(ResHandleTy, OffsetInfo);
llvm::GlobalVariable *BufGV = new GlobalVariable(
- TargetTy, /*isConstant*/ false,
- GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy),
+ LayoutTy, /*isConstant*/ false,
+ GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy),
llvm::formatv("{0}{1}", BufDecl->getName(),
BufDecl->isCBuffer() ? ".cb" : ".tb"),
GlobalValue::NotThreadLocal);
CGM.getModule().insertGlobalVariable(BufGV);
// Add globals for constant buffer elements and create metadata nodes
- emitBufferGlobalsAndMetadata(BufDecl, BufGV);
+ emitBufferGlobalsAndMetadata(BufDecl, BufGV, OffsetInfo);
// Initialize cbuffer from binding (implicit or explicit)
initializeBufferFromBinding(BufDecl, BufGV);
@@ -440,7 +450,7 @@ void CGHLSLRuntime::addRootSignature(
SignatureDecl->getRootElements(), nullptr, M);
}
-llvm::TargetExtType *
+llvm::StructType *
CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
const auto Entry = LayoutTypes.find(StructType);
if (Entry != LayoutTypes.end())
@@ -449,7 +459,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
}
void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType,
- llvm::TargetExtType *LayoutTy) {
+ llvm::StructType *LayoutTy) {
assert(getHLSLBufferLayoutType(StructType) == nullptr &&
"layout type for this struct already exist");
LayoutTypes[StructType] = LayoutTy;
@@ -1103,3 +1113,236 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
}
return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl);
}
+
+std::optional<LValue> CGHLSLRuntime::emitBufferArraySubscriptExpr(
+ const ArraySubscriptExpr *E, CodeGenFunction &CGF,
+ llvm::function_ref<llvm::Value *(bool Promote)> EmitIdxAfterBase) {
+ // Find the element type to index by first padding the element type per HLSL
+ // buffer rules, and then padding out to a 16-byte register boundary if
+ // necessary.
+ llvm::Type *LayoutTy =
+ HLSLBufferLayoutBuilder(CGF.CGM).layOutType(E->getType());
+ uint64_t LayoutSizeInBits =
+ CGM.getDataLayout().getTypeSizeInBits(LayoutTy).getFixedValue();
+ CharUnits ElementSize = CharUnits::fromQuantity(LayoutSizeInBits / 8);
+ CharUnits RowAlignedSize = ElementSize.alignTo(CharUnits::fromQuantity(16));
+ if (RowAlignedSize > ElementSize) {
+ llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding(
+ CGM, RowAlignedSize - ElementSize);
+ assert(Padding && "No padding type for target?");
+ LayoutTy = llvm::StructType::get(CGF.getLLVMContext(), {LayoutTy, Padding},
+ /*isPacked=*/true);
+ }
+
+ // If the layout type doesn't introduce any padding, we don't need to do
+ // anything special.
+ llvm::Type *OrigTy = CGF.CGM.getTypes().ConvertTypeForMem(E->getType());
+ if (LayoutTy == OrigTy)
+ return std::nullopt;
+
+ LValueBaseInfo EltBaseInfo;
+ TBAAAccessInfo EltTBAAInfo;
+ Address Addr =
+ CGF.EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
+ llvm::Value *Idx = EmitIdxAfterBase(/*Promote*/ true);
+
+ // Index into the object as-if we have an array of the padded element type,
+ // and then dereference the element itself to avoid reading padding that may
+ // be past the end of the in-memory object.
+ SmallVector<llvm::Value *, 2> Indices;
+ Indices.push_back(Idx);
+ Indices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0));
+
+ llvm::Value *GEP = CGF.Builder.CreateGEP(LayoutTy, Addr.emitRawPointer(CGF),
+ Indices, "cbufferidx");
+ Addr = Address(GEP, Addr.getElementType(), RowAlignedSize, KnownNonNull);
+
+ return CGF.MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo);
+}
+
+namespace {
+/// Utility for emitting copies following the HLSL buffer layout rules (ie,
+/// copying out of a cbuffer).
+class HLSLBufferCopyEmitter {
+ CodeGenFunction &CGF;
+ Address DestPtr;
+ Address SrcPtr;
+ llvm::Type *LayoutTy = nullptr;
+
+ SmallVector<llvm::Value *> CurStoreIndices;
+ SmallVector<llvm::Value *> CurLoadIndices;
+
+ void emitCopyAtIndices(llvm::Type *FieldTy, llvm::ConstantInt *StoreIndex,
+ llvm::ConstantInt *LoadIndex) {
+ CurStoreIndices.push_back(StoreIndex);
+ CurLoadIndices.push_back(LoadIndex);
+ auto RestoreIndices = llvm::make_scope_exit([&]() {
+ CurStoreIndices.pop_back();
+ CurLoadIndices.pop_back();
+ });
+
+ // First, see if this is some kind of aggregate and recurse.
+ if (processArray(FieldTy))
+ return;
+ if (processBufferLayoutArray(FieldTy))
+ return;
+ if (processStruct(FieldTy))
+ return;
+
+ // When we have a scalar or vector element we can emit the copy.
+ CharUnits Align = CharUnits::fromQuantity(
+ CGF.CGM.getDataLayout().getABITypeAlign(FieldTy));
+ Address SrcGEP = RawAddress(
+ CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(),
+ CurLoadIndices, "cbuf.src"),
+ FieldTy, Align, SrcPtr.isKnownNonNull());
+ Address DestGEP = CGF.Builder.CreateInBoundsGEP(
+ DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest");
+ llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load");
+ CGF.Builder.CreateStore(Load, DestGEP);
+ }
+
+ bool processArray(llvm::Type *FieldTy) {
+ auto *AT = dyn_cast<llvm::ArrayType>(FieldTy);
+ if (!AT)
+ return false;
+
+ // If we have an llvm::ArrayType this is just a regular array with no top
+ // level padding, so all we need to do is copy each member.
+ for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I)
+ emitCopyAtIndices(AT->getElementType(),
+ llvm::ConstantInt::get(CGF.SizeTy, I),
+ llvm::ConstantInt::get(CGF.SizeTy, I));
+ return true;
+ }
+
+ bool processBufferLayoutArray(llvm::Type *FieldTy) {
+ // A buffer layout array is a struct with two elements: the padded array,
+ // and the last element. That is, is should look something like this:
+ //
+ // { [%n x { %type, %padding }], %type }
+ //
+ auto *ST = dyn_cast<llvm::StructType>(FieldTy);
+ if (!ST || ST->getNumElements() != 2)
+ return false;
+
+ auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
+ if (!PaddedEltsTy)
+ return false;
+
+ auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType());
+ if (!PaddedTy || PaddedTy->getNumElements() != 2)
+ return false;
+
+ if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
+ PaddedTy->getElementType(1)))
+ return false;
+
+ llvm::Type *ElementTy = ST->getElementType(1);
+ if (PaddedTy->getElementType(0) != ElementTy)
+ return false;
+
+ // All but the last of the logical array elements are in the padded array.
+ unsigned NumElts = PaddedEltsTy->getNumElements() + 1;
+
+ // Add an extra indirection to the load for the struct and walk the
+ // array prefix.
+ CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ for (unsigned I = 0; I < NumElts - 1; ++I) {
+ // We need to copy the element itself, without the padding.
+ CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, I));
+ emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.SizeTy, I),
+ llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ CurLoadIndices.pop_back();
+ }
+ CurLoadIndices.pop_back();
+
+ // Now copy the last element.
+ emitCopyAtIndices(ElementTy,
+ llvm::ConstantInt::get(CGF.SizeTy, NumElts - 1),
+ llvm::ConstantInt::get(CGF.Int32Ty, 1));
+
+ return true;
+ }
+
+ bool processStruct(llvm::Type *FieldTy) {
+ auto *ST = dyn_cast<llvm::StructType>(FieldTy);
+ if (!ST)
+ return false;
+
+ // Copy the struct field by field, but skip any explicit padding.
+ unsigned Skipped = 0;
+ for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) {
+ llvm::Type *ElementTy = ST->getElementType(I);
+ if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy))
+ ++Skipped;
+ else
+ emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.Int32Ty, I),
+ llvm::ConstantInt::get(CGF.Int32Ty, I + Skipped));
+ }
+ return true;
+ }
+
+public:
+ HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr)
+ : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {}
+
+ bool emitCopy(QualType CType) {
+ LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);
+
+ // TODO: We should be able to fall back to a regular memcpy if the layout
+ // type doesn't have any padding, but that runs into issues in the backend
+ // currently.
+ //
+ // See https://github.com/llvm/wg-hlsl/issues/351
+ emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0),
+ llvm::ConstantInt::get(CGF.SizeTy, 0));
+ return true;
+ }
+};
+} // namespace
+
+bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DestPtr,
+ Address SrcPtr, QualType CType) {
+ return HLSLBufferCopyEmitter(CGF, DestPtr, SrcPtr).emitCopy(CType);
+}
+
+LValue CGHLSLRuntime::emitBufferMemberExpr(CodeGenFunction &CGF,
+ const MemberExpr *E) {
+ LValue Base =
+ CGF.EmitCheckedLValue(E->getBase(), CodeGenFunction::TCK_MemberAccess);
+ auto *Field = dyn_cast<FieldDecl>(E->getMemberDecl());
+ assert(Field && "Unexpected access into HLSL buffer");
+
+ // Get the field index for the struct.
+ const RecordDecl *Rec = Field->getParent();
+ unsigned FieldIdx =
+ CGM.getTypes().getCGRecordLayout(Rec).getLLVMFieldNo(Field);
+
+ // Work out the buffer layout type to index into.
+ QualType RecType = CGM.getContext().getCanonicalTagType(Rec);
+ assert(RecType->isStructureOrClassType() && "Invalid type in HLSL buffer");
+ // Since this is a member of an object in the buffer and not the buffer's
+ // struct/class itself, we shouldn't have any offsets on the members we need
+ // to contend with.
+ CGHLSLOffsetInfo EmptyOffsets;
+ llvm::StructType *LayoutTy = HLSLBufferLayoutBuilder(CGM).layOutStruct(
+ RecType->getAsCanonical<RecordType>(), EmptyOffsets);
+
+ // Now index into the struct, making sure that the type we return is the
+ // buffer layout type rather than the original type in the AST.
+ QualType FieldType = Field->getType();
+ llvm::Type *FieldLLVMTy = CGM.getTypes().ConvertTypeForMem(FieldType);
+ CharUnits Align = CharUnits::fromQuantity(
+ CGF.CGM.getDataLayout().getABITypeAlign(FieldLLVMTy));
+ Address Addr(CGF.Builder.CreateStructGEP(LayoutTy, Base.getPointer(CGF),
+ FieldIdx, Field->getName()),
+ FieldLLVMTy, Align, KnownNonNull);
+
+ LValue LV = LValue::MakeAddr(Addr, FieldType, CGM.getContext(),
+ LValueBaseInfo(AlignmentSource::Type),
+ CGM.getTBAAAccessInfo(FieldType));
+ LV.getQuals().addCVRQualifiers(Base.getVRQualifiers());
+
+ return LV;
+}