summaryrefslogtreecommitdiff
path: root/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp')
-rw-r--r--mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp122
1 files changed, 76 insertions, 46 deletions
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index 36e10372e4bc..6053e34f30a4 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -74,7 +74,9 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
attr.getName() ==
gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
- attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName())
+ attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
+ attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
+ attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
continue;
if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
argAttrs = gpuFuncOp.getArgAttrsAttr();
@@ -82,27 +84,28 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
}
attributes.push_back(attr);
}
+
+ DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
+ DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
+ // Ensure we don't lose information if the function is lowered before its
+ // surrounding context.
+ auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
+ if (knownBlockSize)
+ attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
+ knownBlockSize);
+ if (knownGridSize)
+ attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
+ knownGridSize);
+
// Add a dialect specific kernel attribute in addition to GPU kernel
// attribute. The former is necessary for further translation while the
// latter is expected by gpu.launch_func.
if (gpuFuncOp.isKernel()) {
attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
-
- // Set the block size attribute if it is present.
- if (kernelBlockSizeAttributeName.has_value()) {
- std::optional<int32_t> dimX =
- gpuFuncOp.getKnownBlockSize(gpu::Dimension::x);
- std::optional<int32_t> dimY =
- gpuFuncOp.getKnownBlockSize(gpu::Dimension::y);
- std::optional<int32_t> dimZ =
- gpuFuncOp.getKnownBlockSize(gpu::Dimension::z);
- if (dimX.has_value() || dimY.has_value() || dimZ.has_value()) {
- // If any of the dimensions are missing, fill them in with 1.
- attributes.emplace_back(
- kernelBlockSizeAttributeName.value(),
- rewriter.getDenseI32ArrayAttr(
- {dimX.value_or(1), dimY.value_or(1), dimZ.value_or(1)}));
- }
+ // Set the dialect-specific block size attribute if there is one.
+ if (kernelBlockSizeAttributeName.has_value() && knownBlockSize) {
+ attributes.emplace_back(kernelBlockSizeAttributeName.value(),
+ knownBlockSize);
}
}
auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
@@ -179,35 +182,6 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
&signatureConversion)))
return failure();
- // If bare memref pointers are being used, remap them back to memref
- // descriptors This must be done after signature conversion to get rid of the
- // unrealized casts.
- if (getTypeConverter()->getOptions().useBarePtrCallConv) {
- OpBuilder::InsertionGuard guard(rewriter);
- rewriter.setInsertionPointToStart(&llvmFuncOp.getBody().front());
- for (const auto [idx, argTy] :
- llvm::enumerate(gpuFuncOp.getArgumentTypes())) {
- auto memrefTy = dyn_cast<MemRefType>(argTy);
- if (!memrefTy)
- continue;
- assert(memrefTy.hasStaticShape() &&
- "Bare pointer convertion used with dynamically-shaped memrefs");
- // Use a placeholder when replacing uses of the memref argument to prevent
- // circular replacements.
- auto remapping = signatureConversion.getInputMapping(idx);
- assert(remapping && remapping->size == 1 &&
- "Type converter should produce 1-to-1 mapping for bare memrefs");
- BlockArgument newArg =
- llvmFuncOp.getBody().getArgument(remapping->inputNo);
- auto placeholder = rewriter.create<LLVM::UndefOp>(
- loc, getTypeConverter()->convertType(memrefTy));
- rewriter.replaceUsesOfBlockArgument(newArg, placeholder);
- Value desc = MemRefDescriptor::fromStaticShape(
- rewriter, loc, *getTypeConverter(), memrefTy, newArg);
- rewriter.replaceOp(placeholder, {desc});
- }
- }
-
// Get memref type from function arguments and set the noalias to
// pointer arguments.
for (const auto [idx, argTy] :
@@ -681,6 +655,62 @@ LogicalResult GPUDynamicSharedMemoryOpLowering::matchAndRewrite(
return success();
}
+LogicalResult GPUReturnOpLowering::matchAndRewrite(
+ gpu::ReturnOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const {
+ Location loc = op.getLoc();
+ unsigned numArguments = op.getNumOperands();
+ SmallVector<Value, 4> updatedOperands;
+
+ bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
+ if (useBarePtrCallConv) {
+ // For the bare-ptr calling convention, extract the aligned pointer to
+ // be returned from the memref descriptor.
+ for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
+ Type oldTy = std::get<0>(it).getType();
+ Value newOperand = std::get<1>(it);
+ if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
+ cast<BaseMemRefType>(oldTy))) {
+ MemRefDescriptor memrefDesc(newOperand);
+ newOperand = memrefDesc.allocatedPtr(rewriter, loc);
+ } else if (isa<UnrankedMemRefType>(oldTy)) {
+ // Unranked memref is not supported in the bare pointer calling
+ // convention.
+ return failure();
+ }
+ updatedOperands.push_back(newOperand);
+ }
+ } else {
+ updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
+ (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
+ updatedOperands,
+ /*toDynamic=*/true);
+ }
+
+ // If ReturnOp has 0 or 1 operand, create it and return immediately.
+ if (numArguments <= 1) {
+ rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
+ op, TypeRange(), updatedOperands, op->getAttrs());
+ return success();
+ }
+
+ // Otherwise, we need to pack the arguments into an LLVM struct type before
+ // returning.
+ auto packedType = getTypeConverter()->packFunctionResults(
+ op.getOperandTypes(), useBarePtrCallConv);
+ if (!packedType) {
+ return rewriter.notifyMatchFailure(op, "could not convert result types");
+ }
+
+ Value packed = rewriter.create<LLVM::UndefOp>(loc, packedType);
+ for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
+ packed = rewriter.create<LLVM::InsertValueOp>(loc, packed, operand, idx);
+ }
+ rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
+ op->getAttrs());
+ return success();
+}
+
void mlir::populateGpuMemorySpaceAttributeConversions(
TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
typeConverter.addTypeAttributeConversion(