diff options
Diffstat (limited to 'flang/lib/Optimizer/CodeGen/CodeGen.cpp')
| -rw-r--r-- | flang/lib/Optimizer/CodeGen/CodeGen.cpp | 293 |
1 files changed, 241 insertions, 52 deletions
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 2eeb18273509..4edea86b417c 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -23,8 +23,10 @@ #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Support/TypeCode.h" #include "flang/Optimizer/Support/Utils.h" -#include "flang/Runtime/allocator-registry.h" -#include "flang/Runtime/descriptor.h" +#include "flang/Runtime/CUDA/descriptor.h" +#include "flang/Runtime/CUDA/memory.h" +#include "flang/Runtime/allocator-registry-consts.h" +#include "flang/Runtime/descriptor-consts.h" #include "flang/Semantics/runtime-type-info.h" #include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" @@ -790,7 +792,10 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> { return mlir::success(); } if (mlir::isa<mlir::IntegerType>(toTy)) { - rewriter.replaceOpWithNewOp<mlir::LLVM::FPToSIOp>(convert, toTy, op0); + if (toTy.isUnsignedInteger()) + rewriter.replaceOpWithNewOp<mlir::LLVM::FPToUIOp>(convert, toTy, op0); + else + rewriter.replaceOpWithNewOp<mlir::LLVM::FPToSIOp>(convert, toTy, op0); return mlir::success(); } } else if (mlir::isa<mlir::IntegerType>(fromTy)) { @@ -803,7 +808,7 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> { rewriter.replaceOpWithNewOp<mlir::LLVM::TruncOp>(convert, toTy, op0); return mlir::success(); } - if (fromFirTy == i1Type) { + if (fromFirTy == i1Type || fromFirTy.isUnsignedInteger()) { rewriter.replaceOpWithNewOp<mlir::LLVM::ZExtOp>(convert, toTy, op0); return mlir::success(); } @@ -812,7 +817,10 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> { } // Integer to floating point conversion. if (isFloatingPointTy(toTy)) { - rewriter.replaceOpWithNewOp<mlir::LLVM::SIToFPOp>(convert, toTy, op0); + if (fromTy.isUnsignedInteger()) + rewriter.replaceOpWithNewOp<mlir::LLVM::UIToFPOp>(convert, toTy, op0); + else + rewriter.replaceOpWithNewOp<mlir::LLVM::SIToFPOp>(convert, toTy, op0); return mlir::success(); } // Integer to pointer conversion. @@ -1134,6 +1142,93 @@ convertSubcomponentIndices(mlir::Location loc, mlir::Type eleTy, return result; } +static mlir::Value genSourceFile(mlir::Location loc, mlir::ModuleOp mod, + mlir::ConversionPatternRewriter &rewriter) { + auto ptrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext()); + if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(loc)) { + auto fn = flc.getFilename().str() + '\0'; + std::string globalName = fir::factory::uniqueCGIdent("cl", fn); + + if (auto g = mod.lookupSymbol<fir::GlobalOp>(globalName)) { + return rewriter.create<mlir::LLVM::AddressOfOp>(loc, ptrTy, g.getName()); + } else if (auto g = mod.lookupSymbol<mlir::LLVM::GlobalOp>(globalName)) { + return rewriter.create<mlir::LLVM::AddressOfOp>(loc, ptrTy, g.getName()); + } + + auto crtInsPt = rewriter.saveInsertionPoint(); + rewriter.setInsertionPoint(mod.getBody(), mod.getBody()->end()); + auto arrayTy = mlir::LLVM::LLVMArrayType::get( + mlir::IntegerType::get(rewriter.getContext(), 8), fn.size()); + mlir::LLVM::GlobalOp globalOp = rewriter.create<mlir::LLVM::GlobalOp>( + loc, arrayTy, /*constant=*/true, mlir::LLVM::Linkage::Linkonce, + globalName, mlir::Attribute()); + + mlir::Region ®ion = globalOp.getInitializerRegion(); + mlir::Block *block = rewriter.createBlock(®ion); + rewriter.setInsertionPoint(block, block->begin()); + mlir::Value constValue = rewriter.create<mlir::LLVM::ConstantOp>( + loc, arrayTy, rewriter.getStringAttr(fn)); + rewriter.create<mlir::LLVM::ReturnOp>(loc, constValue); + rewriter.restoreInsertionPoint(crtInsPt); + return rewriter.create<mlir::LLVM::AddressOfOp>(loc, ptrTy, + globalOp.getName()); + } + return rewriter.create<mlir::LLVM::ZeroOp>(loc, ptrTy); +} + +static mlir::Value genSourceLine(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) { + if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(loc)) + return rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI32Type(), + flc.getLine()); + return rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI32Type(), 0); +} + +static mlir::Value +genCUFAllocDescriptor(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter, + mlir::ModuleOp mod, fir::BaseBoxType boxTy, + const fir::LLVMTypeConverter &typeConverter) { + std::optional<mlir::DataLayout> dl = + fir::support::getOrSetDataLayout(mod, /*allowDefaultLayout=*/true); + if (!dl) + mlir::emitError(mod.getLoc(), + "module operation must carry a data layout attribute " + "to generate llvm IR from FIR"); + + mlir::Value sourceFile = genSourceFile(loc, mod, rewriter); + mlir::Value sourceLine = genSourceLine(loc, rewriter); + + mlir::MLIRContext *ctx = mod.getContext(); + + mlir::LLVM::LLVMPointerType llvmPointerType = + mlir::LLVM::LLVMPointerType::get(ctx); + mlir::Type llvmInt32Type = mlir::IntegerType::get(ctx, 32); + mlir::Type llvmIntPtrType = + mlir::IntegerType::get(ctx, typeConverter.getPointerBitwidth(0)); + auto fctTy = mlir::LLVM::LLVMFunctionType::get( + llvmPointerType, {llvmIntPtrType, llvmPointerType, llvmInt32Type}); + + auto llvmFunc = mod.lookupSymbol<mlir::LLVM::LLVMFuncOp>( + RTNAME_STRING(CUFAllocDescriptor)); + auto funcFunc = + mod.lookupSymbol<mlir::func::FuncOp>(RTNAME_STRING(CUFAllocDescriptor)); + if (!llvmFunc && !funcFunc) + mlir::OpBuilder::atBlockEnd(mod.getBody()) + .create<mlir::LLVM::LLVMFuncOp>(loc, RTNAME_STRING(CUFAllocDescriptor), + fctTy); + + mlir::Type structTy = typeConverter.convertBoxTypeAsStruct(boxTy); + std::size_t boxSize = dl->getTypeSizeInBits(structTy) / 8; + mlir::Value sizeInBytes = + genConstantIndex(loc, llvmIntPtrType, rewriter, boxSize); + llvm::SmallVector args = {sizeInBytes, sourceFile, sourceLine}; + return rewriter + .create<mlir::LLVM::CallOp>(loc, fctTy, RTNAME_STRING(CUFAllocDescriptor), + args) + .getResult(); +} + /// Common base class for embox to descriptor conversion. template <typename OP> struct EmboxCommonConversion : public fir::FIROpConversion<OP> { @@ -1246,10 +1341,10 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { /// Get the address of the type descriptor global variable that was created by /// lowering for derived type \p recType. - mlir::Value getTypeDescriptor(mlir::ModuleOp mod, - mlir::ConversionPatternRewriter &rewriter, - mlir::Location loc, - fir::RecordType recType) const { + template <typename ModOpTy> + mlir::Value + getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, fir::RecordType recType) const { std::string name = this->options.typeDescriptorsRenamedForAssembly ? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName()) @@ -1274,7 +1369,8 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { return rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmPtrTy); } - mlir::Value populateDescriptor(mlir::Location loc, mlir::ModuleOp mod, + template <typename ModOpTy> + mlir::Value populateDescriptor(mlir::Location loc, ModOpTy mod, fir::BaseBoxType boxTy, mlir::Type inputType, mlir::ConversionPatternRewriter &rewriter, unsigned rank, mlir::Value eleSize, @@ -1322,16 +1418,12 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { insertField(rewriter, loc, descriptor, {kExtraPosInBox}, extraField); } else { // Compute the value of the extra field based on allocator_idx and - // addendum present using a Descriptor object. - Fortran::runtime::StaticDescriptor staticDescriptor; - Fortran::runtime::Descriptor &desc{staticDescriptor.descriptor()}; - desc.raw().extra = 0; - desc.SetAllocIdx(allocatorIdx); + // addendum present. + unsigned extra = allocatorIdx << _CFI_ALLOCATOR_IDX_SHIFT; if (hasAddendum) - desc.SetHasAddendum(); - descriptor = - insertField(rewriter, loc, descriptor, {kExtraPosInBox}, - this->genI32Constant(loc, rewriter, desc.raw().extra)); + extra |= _CFI_ADDENDUM_FLAG; + descriptor = insertField(rewriter, loc, descriptor, {kExtraPosInBox}, + this->genI32Constant(loc, rewriter, extra)); } if (hasAddendum) { @@ -1417,10 +1509,16 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { extraField = this->getExtraFromBox(loc, sourceBoxTyPair, sourceBox, rewriter); } - auto mod = box->template getParentOfType<mlir::ModuleOp>(); - mlir::Value descriptor = - populateDescriptor(loc, mod, boxTy, inputType, rewriter, rank, eleSize, - cfiTy, typeDesc, allocatorIdx, extraField); + + mlir::Value descriptor; + if (auto gpuMod = box->template getParentOfType<mlir::gpu::GPUModuleOp>()) + descriptor = populateDescriptor(loc, gpuMod, boxTy, inputType, rewriter, + rank, eleSize, cfiTy, typeDesc, + allocatorIdx, extraField); + else if (auto mod = box->template getParentOfType<mlir::ModuleOp>()) + descriptor = populateDescriptor(loc, mod, boxTy, inputType, rewriter, + rank, eleSize, cfiTy, typeDesc, + allocatorIdx, extraField); return {boxTy, descriptor, eleSize}; } @@ -1463,11 +1561,17 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { mlir::Value extraField = this->getExtraFromBox(loc, inputBoxTyPair, loweredBox, rewriter); - auto mod = box->template getParentOfType<mlir::ModuleOp>(); - mlir::Value descriptor = - populateDescriptor(loc, mod, boxTy, box.getBox().getType(), rewriter, - rank, eleSize, cfiTy, typeDesc, - /*allocatorIdx=*/kDefaultAllocator, extraField); + mlir::Value descriptor; + if (auto gpuMod = box->template getParentOfType<mlir::gpu::GPUModuleOp>()) + descriptor = + populateDescriptor(loc, gpuMod, boxTy, box.getBox().getType(), + rewriter, rank, eleSize, cfiTy, typeDesc, + /*allocatorIdx=*/kDefaultAllocator, extraField); + else if (auto mod = box->template getParentOfType<mlir::ModuleOp>()) + descriptor = + populateDescriptor(loc, mod, boxTy, box.getBox().getType(), rewriter, + rank, eleSize, cfiTy, typeDesc, + /*allocatorIdx=*/kDefaultAllocator, extraField); return {boxTy, descriptor, eleSize}; } @@ -1509,12 +1613,13 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { if (gepArgs.size() != 1) fir::emitFatalError(loc, "corrupted substring GEP in fir.embox/fir.rebox"); - mlir::Type outterOffsetTy = gepArgs[0].get<mlir::Value>().getType(); + mlir::Type outterOffsetTy = + llvm::cast<mlir::Value>(gepArgs[0]).getType(); mlir::Value cast = this->integerCast(loc, rewriter, outterOffsetTy, *substringOffset); gepArgs[0] = rewriter.create<mlir::LLVM::AddOp>( - loc, outterOffsetTy, gepArgs[0].get<mlir::Value>(), cast); + loc, outterOffsetTy, llvm::cast<mlir::Value>(gepArgs[0]), cast); } } mlir::Type llvmPtrTy = ::getLlvmPtrType(resultTy.getContext()); @@ -1551,15 +1656,24 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> { mlir::Value placeInMemoryIfNotGlobalInit(mlir::ConversionPatternRewriter &rewriter, mlir::Location loc, mlir::Type boxTy, - mlir::Value boxValue) const { + mlir::Value boxValue, + bool needDeviceAllocation = false) const { if (isInGlobalOp(rewriter)) return boxValue; mlir::Type llvmBoxTy = boxValue.getType(); - auto alloca = this->genAllocaAndAddrCastWithType(loc, llvmBoxTy, - defaultAlign, rewriter); - auto storeOp = rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, alloca); + mlir::Value storage; + if (needDeviceAllocation) { + auto mod = boxValue.getDefiningOp()->getParentOfType<mlir::ModuleOp>(); + auto baseBoxTy = mlir::dyn_cast<fir::BaseBoxType>(boxTy); + storage = + genCUFAllocDescriptor(loc, rewriter, mod, baseBoxTy, this->lowerTy()); + } else { + storage = this->genAllocaAndAddrCastWithType(loc, llvmBoxTy, defaultAlign, + rewriter); + } + auto storeOp = rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, storage); this->attachTBAATag(storeOp, boxTy, boxTy, nullptr); - return alloca; + return storage; } }; @@ -1611,6 +1725,25 @@ struct EmboxOpConversion : public EmboxCommonConversion<fir::EmboxOp> { } }; +static bool isDeviceAllocation(mlir::Value val) { + if (auto loadOp = mlir::dyn_cast_or_null<fir::LoadOp>(val.getDefiningOp())) + return isDeviceAllocation(loadOp.getMemref()); + if (auto boxAddrOp = + mlir::dyn_cast_or_null<fir::BoxAddrOp>(val.getDefiningOp())) + return isDeviceAllocation(boxAddrOp.getVal()); + if (auto convertOp = + mlir::dyn_cast_or_null<fir::ConvertOp>(val.getDefiningOp())) + return isDeviceAllocation(convertOp.getValue()); + if (auto callOp = mlir::dyn_cast_or_null<fir::CallOp>(val.getDefiningOp())) + if (callOp.getCallee() && + (callOp.getCallee().value().getRootReference().getValue().starts_with( + RTNAME_STRING(CUFMemAlloc)) || + callOp.getCallee().value().getRootReference().getValue().starts_with( + RTNAME_STRING(CUFAllocDescriptor)))) + return true; + return false; +} + /// Create a generic box on a memory reference. struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> { using EmboxCommonConversion::EmboxCommonConversion; @@ -1794,9 +1927,8 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> { dest = insertBaseAddress(rewriter, loc, dest, base); if (fir::isDerivedTypeWithLenParams(boxTy)) TODO(loc, "fir.embox codegen of derived with length parameters"); - - mlir::Value result = - placeInMemoryIfNotGlobalInit(rewriter, loc, boxTy, dest); + mlir::Value result = placeInMemoryIfNotGlobalInit( + rewriter, loc, boxTy, dest, isDeviceAllocation(xbox.getMemref())); rewriter.replaceOp(xbox, result); return mlir::success(); } @@ -1921,7 +2053,8 @@ private: } dest = insertBaseAddress(rewriter, loc, dest, base); mlir::Value result = - placeInMemoryIfNotGlobalInit(rewriter, rebox.getLoc(), destBoxTy, dest); + placeInMemoryIfNotGlobalInit(rewriter, rebox.getLoc(), destBoxTy, dest, + isDeviceAllocation(rebox.getBox())); rewriter.replaceOp(rebox, result); return mlir::success(); } @@ -2866,10 +2999,12 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { g.setAlignment(*global.getAlignment()); auto module = global->getParentOfType<mlir::ModuleOp>(); + auto gpuMod = global->getParentOfType<mlir::gpu::GPUModuleOp>(); // Add comdat if necessary if (fir::getTargetTriple(module).supportsCOMDAT() && (linkage == mlir::LLVM::Linkage::Linkonce || - linkage == mlir::LLVM::Linkage::LinkonceODR)) { + linkage == mlir::LLVM::Linkage::LinkonceODR) && + !gpuMod) { addComdat(g, rewriter, module); } @@ -2952,7 +3087,7 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { private: static void addComdat(mlir::LLVM::GlobalOp &global, mlir::ConversionPatternRewriter &rewriter, - mlir::ModuleOp &module) { + mlir::ModuleOp module) { const char *comdatName = "__llvm_comdat"; mlir::LLVM::ComdatOp comdatOp = module.lookupSymbol<mlir::LLVM::ComdatOp>(comdatName); @@ -2990,9 +3125,23 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> { // loading a fir.ref<fir.box> is implemented as taking a snapshot of the // descriptor value into a new descriptor temp. auto inputBoxStorage = adaptor.getOperands()[0]; + mlir::Value newBoxStorage; mlir::Location loc = load.getLoc(); - auto newBoxStorage = - genAllocaAndAddrCastWithType(loc, llvmLoadTy, defaultAlign, rewriter); + if (auto callOp = mlir::dyn_cast_or_null<mlir::LLVM::CallOp>( + inputBoxStorage.getDefiningOp())) { + if (callOp.getCallee() && + (*callOp.getCallee()) + .starts_with(RTNAME_STRING(CUFAllocDescriptor))) { + // CUDA Fortran local descriptor are allocated in managed memory. So + // new storage must be allocated the same way. + auto mod = load->getParentOfType<mlir::ModuleOp>(); + newBoxStorage = + genCUFAllocDescriptor(loc, rewriter, mod, boxTy, lowerTy()); + } + } + if (!newBoxStorage) + newBoxStorage = genAllocaAndAddrCastWithType(loc, llvmLoadTy, + defaultAlign, rewriter); TypePair boxTypePair{boxTy, llvmLoadTy}; mlir::Value boxSize = @@ -3149,10 +3298,40 @@ struct SelectCaseOpConversion : public fir::FIROpConversion<fir::SelectCaseOp> { } }; +/// Helper function for converting select ops. This function converts the +/// signature of the given block. If the new block signature is different from +/// `expectedTypes`, returns "failure". +static llvm::FailureOr<mlir::Block *> +getConvertedBlock(mlir::ConversionPatternRewriter &rewriter, + const mlir::TypeConverter *converter, + mlir::Operation *branchOp, mlir::Block *block, + mlir::TypeRange expectedTypes) { + assert(converter && "expected non-null type converter"); + assert(!block->isEntryBlock() && "entry blocks have no predecessors"); + + // There is nothing to do if the types already match. + if (block->getArgumentTypes() == expectedTypes) + return block; + + // Compute the new block argument types and convert the block. + std::optional<mlir::TypeConverter::SignatureConversion> conversion = + converter->convertBlockSignature(block); + if (!conversion) + return rewriter.notifyMatchFailure(branchOp, + "could not compute block signature"); + if (expectedTypes != conversion->getConvertedTypes()) + return rewriter.notifyMatchFailure( + branchOp, + "mismatch between adaptor operand types and computed block signature"); + return rewriter.applySignatureConversion(block, *conversion, converter); +} + template <typename OP> -static void selectMatchAndRewrite(const fir::LLVMTypeConverter &lowering, - OP select, typename OP::Adaptor adaptor, - mlir::ConversionPatternRewriter &rewriter) { +static llvm::LogicalResult +selectMatchAndRewrite(const fir::LLVMTypeConverter &lowering, OP select, + typename OP::Adaptor adaptor, + mlir::ConversionPatternRewriter &rewriter, + const mlir::TypeConverter *converter) { unsigned conds = select.getNumConditions(); auto cases = select.getCases().getValue(); mlir::Value selector = adaptor.getSelector(); @@ -3170,15 +3349,24 @@ static void selectMatchAndRewrite(const fir::LLVMTypeConverter &lowering, auto destOps = select.getSuccessorOperands(adaptor.getOperands(), t); const mlir::Attribute &attr = cases[t]; if (auto intAttr = mlir::dyn_cast<mlir::IntegerAttr>(attr)) { - destinations.push_back(dest); destinationsOperands.push_back(destOps ? *destOps : mlir::ValueRange{}); + auto convertedBlock = + getConvertedBlock(rewriter, converter, select, dest, + mlir::TypeRange(destinationsOperands.back())); + if (mlir::failed(convertedBlock)) + return mlir::failure(); + destinations.push_back(*convertedBlock); caseValues.push_back(intAttr.getInt()); continue; } assert(mlir::dyn_cast_or_null<mlir::UnitAttr>(attr)); assert((t + 1 == conds) && "unit must be last"); - defaultDestination = dest; defaultOperands = destOps ? *destOps : mlir::ValueRange{}; + auto convertedBlock = getConvertedBlock(rewriter, converter, select, dest, + mlir::TypeRange(defaultOperands)); + if (mlir::failed(convertedBlock)) + return mlir::failure(); + defaultDestination = *convertedBlock; } // LLVM::SwitchOp takes a i32 type for the selector. @@ -3194,6 +3382,7 @@ static void selectMatchAndRewrite(const fir::LLVMTypeConverter &lowering, /*caseDestinations=*/destinations, /*caseOperands=*/destinationsOperands, /*branchWeights=*/llvm::ArrayRef<std::int32_t>()); + return mlir::success(); } /// conversion of fir::SelectOp to an if-then-else ladder @@ -3203,8 +3392,8 @@ struct SelectOpConversion : public fir::FIROpConversion<fir::SelectOp> { llvm::LogicalResult matchAndRewrite(fir::SelectOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - selectMatchAndRewrite<fir::SelectOp>(lowerTy(), op, adaptor, rewriter); - return mlir::success(); + return selectMatchAndRewrite<fir::SelectOp>(lowerTy(), op, adaptor, + rewriter, getTypeConverter()); } }; @@ -3215,8 +3404,8 @@ struct SelectRankOpConversion : public fir::FIROpConversion<fir::SelectRankOp> { llvm::LogicalResult matchAndRewrite(fir::SelectRankOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - selectMatchAndRewrite<fir::SelectRankOp>(lowerTy(), op, adaptor, rewriter); - return mlir::success(); + return selectMatchAndRewrite<fir::SelectRankOp>( + lowerTy(), op, adaptor, rewriter, getTypeConverter()); } }; |
