diff options
Diffstat (limited to 'flang/lib/Optimizer/Transforms')
| -rw-r--r-- | flang/lib/Optimizer/Transforms/AddDebugInfo.cpp | 6 | ||||
| -rw-r--r-- | flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp | 2 | ||||
| -rw-r--r-- | flang/lib/Optimizer/Transforms/CUFCommon.cpp | 56 | ||||
| -rw-r--r-- | flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp | 2 | ||||
| -rw-r--r-- | flang/lib/Optimizer/Transforms/CUFOpConversion.cpp | 37 | ||||
| -rw-r--r-- | flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp | 30 | ||||
| -rw-r--r-- | flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp | 2 | ||||
| -rw-r--r-- | flang/lib/Optimizer/Transforms/StackArrays.cpp | 37 |
9 files changed, 78 insertions, 95 deletions
diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index 3a437c7a0f01..a8e9d198ccb9 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -121,9 +121,9 @@ void AddDebugInfoPass::handleDeclareOp(fir::cg::XDeclareOp declOp, // constant attribute of [hl]fir.declare/fircg.ext_declare operation that has // a dummy_scope operand). unsigned argNo = 0; - if (fir::isDummyArgument(declOp.getMemref())) { - auto arg = llvm::cast<mlir::BlockArgument>(declOp.getMemref()); - argNo = arg.getArgNumber() + 1; + if (declOp.getDummyScope()) { + if (auto arg = llvm::dyn_cast<mlir::BlockArgument>(declOp.getMemref())) + argNo = arg.getArgNumber() + 1; } auto tyAttr = typeGen.convertType(fir::unwrapRefType(declOp.getType()), diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index 9eafa4ec234b..d20d3bc4108c 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -9,7 +9,6 @@ add_flang_library(FIRTransforms CompilerGeneratedNames.cpp ConstantArgumentGlobalisation.cpp ControlFlowConverter.cpp - CUFCommon.cpp CUFAddConstructor.cpp CUFDeviceGlobal.cpp CUFOpConversion.cpp diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp index 9591f48c5d41..97551595db03 100644 --- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp +++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Builder/BoxValue.h" +#include "flang/Optimizer/Builder/CUFCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/Builder/Todo.h" @@ -19,7 +20,6 @@ #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Support/DataLayout.h" -#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/CUDA/registration.h" #include "flang/Runtime/entry-names.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" diff --git a/flang/lib/Optimizer/Transforms/CUFCommon.cpp b/flang/lib/Optimizer/Transforms/CUFCommon.cpp deleted file mode 100644 index bbe33217e8f4..000000000000 --- a/flang/lib/Optimizer/Transforms/CUFCommon.cpp +++ /dev/null @@ -1,56 +0,0 @@ -//===-- CUFCommon.cpp - Shared functions between passes ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "flang/Optimizer/Transforms/CUFCommon.h" -#include "flang/Optimizer/Dialect/CUF/CUFOps.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" - -/// Retrieve or create the CUDA Fortran GPU module in the give in \p mod. -mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod, - mlir::SymbolTable &symTab) { - if (auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName)) - return gpuMod; - - auto *ctx = mod.getContext(); - mod->setAttr(mlir::gpu::GPUDialect::getContainerModuleAttrName(), - mlir::UnitAttr::get(ctx)); - - mlir::OpBuilder builder(ctx); - auto gpuMod = builder.create<mlir::gpu::GPUModuleOp>(mod.getLoc(), - cudaDeviceModuleName); - mlir::Block::iterator insertPt(mod.getBodyRegion().front().end()); - symTab.insert(gpuMod, insertPt); - return gpuMod; -} - -bool cuf::isInCUDADeviceContext(mlir::Operation *op) { - if (!op) - return false; - if (op->getParentOfType<cuf::KernelOp>() || - op->getParentOfType<mlir::gpu::GPUFuncOp>()) - return true; - if (auto funcOp = op->getParentOfType<mlir::func::FuncOp>()) { - if (auto cudaProcAttr = funcOp->getAttrOfType<cuf::ProcAttributeAttr>( - cuf::getProcAttrName())) { - return cudaProcAttr.getValue() != cuf::ProcAttribute::Host; - } - } - return false; -} - -bool cuf::isRegisteredDeviceGlobal(fir::GlobalOp op) { - if (op.getConstant()) - return false; - auto attr = op.getDataAttr(); - if (attr && (*attr == cuf::DataAttribute::Device || - *attr == cuf::DataAttribute::Managed || - *attr == cuf::DataAttribute::Constant)) - return true; - return false; -} diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp index 07cc1f3b4b51..2e6c272fa908 100644 --- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp +++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "flang/Common/Fortran.h" +#include "flang/Optimizer/Builder/CUFCommon.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/InternalNames.h" -#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/allocatable.h" #include "mlir/Dialect/LLVMIR/NVVMDialect.h" diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index de5c51556eec..8c525fc6daff 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -8,6 +8,7 @@ #include "flang/Optimizer/Transforms/CUFOpConversion.h" #include "flang/Common/Fortran.h" +#include "flang/Optimizer/Builder/CUFCommon.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/CodeGen/TypeConverter.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" @@ -15,7 +16,6 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" -#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/CUDA/allocatable.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" @@ -788,6 +788,38 @@ private: const mlir::SymbolTable &symTab; }; +struct CUFSyncDescriptorOpConversion + : public mlir::OpRewritePattern<cuf::SyncDescriptorOp> { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(cuf::SyncDescriptorOp op, + mlir::PatternRewriter &rewriter) const override { + auto mod = op->getParentOfType<mlir::ModuleOp>(); + fir::FirOpBuilder builder(rewriter, mod); + mlir::Location loc = op.getLoc(); + + auto globalOp = mod.lookupSymbol<fir::GlobalOp>(op.getGlobalName()); + if (!globalOp) + return mlir::failure(); + + auto hostAddr = builder.create<fir::AddrOfOp>( + loc, fir::ReferenceType::get(globalOp.getType()), op.getGlobalName()); + mlir::func::FuncOp callee = + fir::runtime::getRuntimeFunc<mkRTKey(CUFSyncGlobalDescriptor)>(loc, + builder); + auto fTy = callee.getFunctionType(); + mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); + llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( + builder, loc, fTy, hostAddr, sourceFile, sourceLine)}; + builder.create<fir::CallOp>(loc, callee, args); + op.erase(); + return mlir::success(); + } +}; + class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> { public: void runOnOperation() override { @@ -848,7 +880,8 @@ void cuf::populateCUFToFIRConversionPatterns( const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) { patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter); patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion, - CUFFreeOpConversion>(patterns.getContext()); + CUFFreeOpConversion, CUFSyncDescriptorOpConversion>( + patterns.getContext()); patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab, &dl, &converter); patterns.insert<CUFLaunchOpConversion>(patterns.getContext(), symtab); diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index cc99698ead33..8ae3d313d881 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -325,7 +325,7 @@ static bool canCacheThisType(mlir::LLVM::DICompositeTypeAttr comTy) { std::pair<std::uint64_t, unsigned short> DebugTypeGenerator::getFieldSizeAndAlign(mlir::Type fieldTy) { mlir::Type llvmTy; - if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(fieldTy)) + if (auto boxTy = mlir::dyn_cast_if_present<fir::BaseBoxType>(fieldTy)) llvmTy = llvmTypeConverter.convertBoxTypeAsStruct(boxTy, getBoxRank(boxTy)); else llvmTy = llvmTypeConverter.convertType(fieldTy); @@ -371,7 +371,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( std::optional<llvm::ArrayRef<int64_t>> lowerBounds = fir::getComponentLowerBoundsIfNonDefault(Ty, fieldName, module, symbolTable); - auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(fieldTy); + auto seqTy = mlir::dyn_cast_if_present<fir::SequenceType>(fieldTy); // For members of the derived types, the information about the shift in // lower bounds is not part of the declOp but has to be extracted from the @@ -622,10 +622,10 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertPointerLikeType( // Arrays and character need different treatment because DWARF have special // constructs for them to get the location from the descriptor. Rest of // types are handled like pointer to underlying type. - if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(elTy)) + if (auto seqTy = mlir::dyn_cast_if_present<fir::SequenceType>(elTy)) return convertBoxedSequenceType(seqTy, fileAttr, scope, declOp, genAllocated, genAssociated); - if (auto charTy = mlir::dyn_cast_or_null<fir::CharacterType>(elTy)) + if (auto charTy = mlir::dyn_cast_if_present<fir::CharacterType>(elTy)) return convertCharacterType(charTy, fileAttr, scope, declOp, /*hasDescriptor=*/true); @@ -638,7 +638,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertPointerLikeType( return mlir::LLVM::DIDerivedTypeAttr::get( context, llvm::dwarf::DW_TAG_pointer_type, - mlir::StringAttr::get(context, ""), elTyAttr, ptrSize, + mlir::StringAttr::get(context, ""), elTyAttr, /*sizeInBits=*/ptrSize * 8, /*alignInBits=*/0, /*offset=*/0, /*optional<address space>=*/std::nullopt, /*extra data=*/nullptr); } @@ -654,22 +654,22 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, } else if (mlir::isa<mlir::FloatType>(Ty)) { return genBasicType(context, mlir::StringAttr::get(context, "real"), Ty.getIntOrFloatBitWidth(), llvm::dwarf::DW_ATE_float); - } else if (auto logTy = mlir::dyn_cast_or_null<fir::LogicalType>(Ty)) { + } else if (auto logTy = mlir::dyn_cast_if_present<fir::LogicalType>(Ty)) { return genBasicType(context, mlir::StringAttr::get(context, logTy.getMnemonic()), kindMapping.getLogicalBitsize(logTy.getFKind()), llvm::dwarf::DW_ATE_boolean); - } else if (auto cplxTy = mlir::dyn_cast_or_null<mlir::ComplexType>(Ty)) { + } else if (auto cplxTy = mlir::dyn_cast_if_present<mlir::ComplexType>(Ty)) { auto floatTy = mlir::cast<mlir::FloatType>(cplxTy.getElementType()); unsigned bitWidth = floatTy.getWidth(); return genBasicType(context, mlir::StringAttr::get(context, "complex"), bitWidth * 2, llvm::dwarf::DW_ATE_complex_float); - } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(Ty)) { + } else if (auto seqTy = mlir::dyn_cast_if_present<fir::SequenceType>(Ty)) { return convertSequenceType(seqTy, fileAttr, scope, declOp); - } else if (auto charTy = mlir::dyn_cast_or_null<fir::CharacterType>(Ty)) { + } else if (auto charTy = mlir::dyn_cast_if_present<fir::CharacterType>(Ty)) { return convertCharacterType(charTy, fileAttr, scope, declOp, /*hasDescriptor=*/false); - } else if (auto recTy = mlir::dyn_cast_or_null<fir::RecordType>(Ty)) { + } else if (auto recTy = mlir::dyn_cast_if_present<fir::RecordType>(Ty)) { return convertRecordType(recTy, fileAttr, scope, declOp); } else if (auto tupleTy = mlir::dyn_cast_if_present<mlir::TupleType>(Ty)) { return convertTupleType(tupleTy, fileAttr, scope, declOp); @@ -678,22 +678,22 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, return convertPointerLikeType(elTy, fileAttr, scope, declOp, /*genAllocated=*/false, /*genAssociated=*/false); - } else if (auto vecTy = mlir::dyn_cast_or_null<fir::VectorType>(Ty)) { + } else if (auto vecTy = mlir::dyn_cast_if_present<fir::VectorType>(Ty)) { return convertVectorType(vecTy, fileAttr, scope, declOp); } else if (mlir::isa<mlir::IndexType>(Ty)) { return genBasicType(context, mlir::StringAttr::get(context, "integer"), llvmTypeConverter.getIndexTypeBitwidth(), llvm::dwarf::DW_ATE_signed); - } else if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(Ty)) { + } else if (auto boxTy = mlir::dyn_cast_if_present<fir::BaseBoxType>(Ty)) { auto elTy = boxTy.getEleTy(); - if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(elTy)) + if (auto seqTy = mlir::dyn_cast_if_present<fir::SequenceType>(elTy)) return convertBoxedSequenceType(seqTy, fileAttr, scope, declOp, false, false); - if (auto heapTy = mlir::dyn_cast_or_null<fir::HeapType>(elTy)) + if (auto heapTy = mlir::dyn_cast_if_present<fir::HeapType>(elTy)) return convertPointerLikeType(heapTy.getElementType(), fileAttr, scope, declOp, /*genAllocated=*/true, /*genAssociated=*/false); - if (auto ptrTy = mlir::dyn_cast_or_null<fir::PointerType>(elTy)) + if (auto ptrTy = mlir::dyn_cast_if_present<fir::PointerType>(elTy)) return convertPointerLikeType(ptrTy.getElementType(), fileAttr, scope, declOp, /*genAllocated=*/false, /*genAssociated=*/true); diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp index d3567f453fce..fa6a7b23624e 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp @@ -24,6 +24,7 @@ #include "flang/Common/Fortran.h" #include "flang/Optimizer/Builder/BoxValue.h" +#include "flang/Optimizer/Builder/CUFCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/LowLevelIntrinsics.h" #include "flang/Optimizer/Builder/Todo.h" @@ -31,7 +32,6 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/HLFIR/HLFIRDialect.h" -#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Optimizer/Transforms/Passes.h" #include "flang/Optimizer/Transforms/Utils.h" #include "flang/Runtime/entry-names.h" diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp index bdcb8199b790..2a9d3397e87b 100644 --- a/flang/lib/Optimizer/Transforms/StackArrays.cpp +++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -330,6 +330,18 @@ std::optional<AllocationState> LatticePoint::get(mlir::Value val) const { return it->second; } +static mlir::Value lookThroughDeclaresAndConverts(mlir::Value value) { + while (mlir::Operation *op = value.getDefiningOp()) { + if (auto declareOp = llvm::dyn_cast<fir::DeclareOp>(op)) + value = declareOp.getMemref(); + else if (auto convertOp = llvm::dyn_cast<fir::ConvertOp>(op)) + value = convertOp->getOperand(0); + else + return value; + } + return value; +} + mlir::LogicalResult AllocationAnalysis::visitOperation( mlir::Operation *op, const LatticePoint &before, LatticePoint *after) { LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op @@ -363,10 +375,10 @@ mlir::LogicalResult AllocationAnalysis::visitOperation( mlir::Value operand = op->getOperand(0); // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir - // to fir. Therefore, we only need to handle `fir::DeclareOp`s. - if (auto declareOp = - llvm::dyn_cast_if_present<fir::DeclareOp>(operand.getDefiningOp())) - operand = declareOp.getMemref(); + // to fir. Therefore, we only need to handle `fir::DeclareOp`s. Also look + // past converts in case the pointer was changed between different pointer + // types. + operand = lookThroughDeclaresAndConverts(operand); std::optional<AllocationState> operandState = before.get(operand); if (operandState && *operandState == AllocationState::Allocated) { @@ -535,17 +547,12 @@ AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem, // remove freemem operations llvm::SmallVector<mlir::Operation *> erases; - for (mlir::Operation *user : allocmem.getOperation()->getUsers()) { - if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) { - for (mlir::Operation *user : declareOp->getUsers()) { - if (mlir::isa<fir::FreeMemOp>(user)) - erases.push_back(user); - } - } - - if (mlir::isa<fir::FreeMemOp>(user)) - erases.push_back(user); - } + mlir::Operation *parent = allocmem->getParentOp(); + // TODO: this shouldn't need to be re-calculated for every allocmem + parent->walk([&](fir::FreeMemOp freeOp) { + if (lookThroughDeclaresAndConverts(freeOp->getOperand(0)) == allocmem) + erases.push_back(freeOp); + }); // now we are done iterating the users, it is safe to mutate them for (mlir::Operation *erase : erases) |
