diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /flang/lib/Optimizer/OpenMP | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'flang/lib/Optimizer/OpenMP')
4 files changed, 456 insertions, 47 deletions
diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt index e0aebd0714c8..b85ee7e861a4 100644 --- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt @@ -26,6 +26,7 @@ add_flang_library(FlangOpenMPTransforms FIRSupport FortranSupport HLFIRDialect + FortranUtils MLIR_DEPS ${dialect_libs} diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index 2b3ac169e8b5..6c7192400084 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -6,17 +6,23 @@ // //===----------------------------------------------------------------------===// +#include "flang/Optimizer/Builder/DirectivesCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/OpenMP/Utils.h" #include "flang/Support/OpenMP-utils.h" +#include "flang/Utils/OpenMP.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/IRMapping.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" namespace flangomp { #define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS @@ -107,6 +113,33 @@ private: using InductionVariableInfos = llvm::SmallVector<InductionVariableInfo>; +/// Collect the list of values used inside the loop but defined outside of it. +void collectLoopLiveIns(fir::DoConcurrentLoopOp loop, + llvm::SmallVectorImpl<mlir::Value> &liveIns) { + llvm::SmallDenseSet<mlir::Value> seenValues; + llvm::SmallPtrSet<mlir::Operation *, 8> seenOps; + + for (auto [lb, ub, st] : llvm::zip_equal( + loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) { + liveIns.push_back(lb); + liveIns.push_back(ub); + liveIns.push_back(st); + } + + mlir::visitUsedValuesDefinedAbove( + loop.getRegion(), [&](mlir::OpOperand *operand) { + if (!seenValues.insert(operand->get()).second) + return; + + mlir::Operation *definingOp = operand->get().getDefiningOp(); + // We want to collect ops corresponding to live-ins only once. + if (definingOp && !seenOps.insert(definingOp).second) + return; + + liveIns.push_back(operand->get()); + }); +} + /// Collects values that are local to a loop: "loop-local values". A loop-local /// value is one that is used exclusively inside the loop but allocated outside /// of it. This usually corresponds to temporary values that are used inside the @@ -168,22 +201,66 @@ static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion, class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoConcurrentOp> { +private: + struct TargetDeclareShapeCreationInfo { + // Note: We use `std::vector` (rather than `llvm::SmallVector` as usual) to + // interface more easily `ShapeShiftOp::getOrigins()` which returns + // `std::vector`. + std::vector<mlir::Value> startIndices; + std::vector<mlir::Value> extents; + + TargetDeclareShapeCreationInfo(mlir::Value liveIn) { + mlir::Value shape = nullptr; + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp); + + if (declareOp != nullptr) + shape = declareOp.getShape(); + + if (!shape) + return; + + auto shapeOp = + mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp()); + auto shapeShiftOp = + mlir::dyn_cast_if_present<fir::ShapeShiftOp>(shape.getDefiningOp()); + + if (!shapeOp && !shapeShiftOp) + TODO(liveIn.getLoc(), + "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are" + "not supported yet."); + + if (shapeShiftOp != nullptr) + startIndices = shapeShiftOp.getOrigins(); + + extents = shapeOp != nullptr + ? std::vector<mlir::Value>(shapeOp.getExtents().begin(), + shapeOp.getExtents().end()) + : shapeShiftOp.getExtents(); + } + + bool isShapedValue() const { return !extents.empty(); } + bool isShapeShiftedValue() const { return !startIndices.empty(); } + }; + + using LiveInShapeInfoMap = + llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>; + public: using mlir::OpConversionPattern<fir::DoConcurrentOp>::OpConversionPattern; DoConcurrentConversion( mlir::MLIRContext *context, bool mapToDevice, - llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip) + llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip, + mlir::SymbolTable &moduleSymbolTable) : OpConversionPattern(context), mapToDevice(mapToDevice), - concurrentLoopsToSkip(concurrentLoopsToSkip) {} + concurrentLoopsToSkip(concurrentLoopsToSkip), + moduleSymbolTable(moduleSymbolTable) {} mlir::LogicalResult matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - if (mapToDevice) - return doLoop.emitError( - "not yet implemented: Mapping `do concurrent` loops to device"); - looputils::InductionVariableInfos ivInfos; auto loop = mlir::cast<fir::DoConcurrentLoopOp>( doLoop.getRegion().back().getTerminator()); @@ -194,20 +271,72 @@ public: for (mlir::Value indVar : *indVars) ivInfos.emplace_back(loop, indVar); + llvm::SmallVector<mlir::Value> loopNestLiveIns; + looputils::collectLoopLiveIns(loop, loopNestLiveIns); + assert(!loopNestLiveIns.empty()); + llvm::SetVector<mlir::Value> locals; looputils::collectLoopLocalValues(loop, locals); + // We do not want to map "loop-local" values to the device through + // `omp.map.info` ops. Therefore, we remove them from the list of live-ins. + loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns, + [&](mlir::Value liveIn) { + return locals.contains(liveIn); + }), + loopNestLiveIns.end()); + + mlir::omp::TargetOp targetOp; + mlir::omp::LoopNestOperands loopNestClauseOps; + mlir::IRMapping mapper; + + if (mapToDevice) { + mlir::ModuleOp module = doLoop->getParentOfType<mlir::ModuleOp>(); + bool isTargetDevice = + llvm::cast<mlir::omp::OffloadModuleInterface>(*module) + .getIsTargetDevice(); + + mlir::omp::TargetOperands targetClauseOps; + genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, + loopNestClauseOps, + isTargetDevice ? nullptr : &targetClauseOps); + + LiveInShapeInfoMap liveInShapeInfoMap; + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>())); + + for (mlir::Value liveIn : loopNestLiveIns) { + targetClauseOps.mapVars.push_back( + genMapInfoOpForLiveIn(builder, liveIn)); + liveInShapeInfoMap.insert( + {liveIn, TargetDeclareShapeCreationInfo(liveIn)}); + } + + targetOp = + genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, + targetClauseOps, loopNestClauseOps, liveInShapeInfoMap); + genTeamsOp(doLoop.getLoc(), rewriter); + } + mlir::omp::ParallelOp parallelOp = genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper); - mlir::omp::LoopNestOperands loopNestClauseOps; - genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, - loopNestClauseOps); + + // Only set as composite when part of `distribute parallel do`. + parallelOp.setComposite(mapToDevice); + + if (!mapToDevice) + genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, + loopNestClauseOps); for (mlir::Value local : locals) looputils::localizeLoopLocalValue(local, parallelOp.getRegion(), rewriter); + if (mapToDevice) + genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true); + mlir::omp::LoopNestOp ompLoopNest = genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps, /*isComposite=*/mapToDevice); @@ -282,11 +411,11 @@ private: return result; } - void - genLoopNestClauseOps(mlir::Location loc, - mlir::ConversionPatternRewriter &rewriter, - fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, - mlir::omp::LoopNestOperands &loopNestClauseOps) const { + void genLoopNestClauseOps( + mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, + mlir::omp::LoopNestOperands &loopNestClauseOps, + mlir::omp::TargetOperands *targetClauseOps = nullptr) const { assert(loopNestClauseOps.loopLowerBounds.empty() && "Loop nest bounds were already emitted!"); @@ -295,11 +424,21 @@ private: bounds.push_back(var.getDefiningOp()->getResult(0)); }; + auto hostEvalCapture = [&](mlir::Value var, + llvm::SmallVectorImpl<mlir::Value> &bounds) { + populateBounds(var, bounds); + + // Ensure that loop-nest bounds are evaluated in the host and forwarded to + // the nested omp constructs when we map to the device. + if (targetClauseOps) + targetClauseOps->hostEvalVars.push_back(var); + }; + for (auto [lb, ub, st] : llvm::zip_equal( loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) { - populateBounds(lb, loopNestClauseOps.loopLowerBounds); - populateBounds(ub, loopNestClauseOps.loopUpperBounds); - populateBounds(st, loopNestClauseOps.loopSteps); + hostEvalCapture(lb, loopNestClauseOps.loopLowerBounds); + hostEvalCapture(ub, loopNestClauseOps.loopUpperBounds); + hostEvalCapture(st, loopNestClauseOps.loopSteps); } loopNestClauseOps.loopInclusive = rewriter.getUnitAttr(); @@ -332,8 +471,8 @@ private: loop.getLocalVars(), loop.getLocalSymsAttr().getAsRange<mlir::SymbolRefAttr>(), loop.getRegionLocalArgs())) { - auto localizer = mlir::SymbolTable::lookupNearestSymbolFrom< - fir::LocalitySpecifierOp>(loop, sym); + auto localizer = moduleSymbolTable.lookup<fir::LocalitySpecifierOp>( + sym.getLeafReference()); if (localizer.getLocalitySpecifierType() == fir::LocalitySpecifierType::LocalInit) TODO(localizer.getLoc(), @@ -352,6 +491,8 @@ private: cloneFIRRegionToOMP(localizer.getDeallocRegion(), privatizer.getDeallocRegion()); + moduleSymbolTable.insert(privatizer); + wsloopClauseOps.privateVars.push_back(op); wsloopClauseOps.privateSyms.push_back( mlir::SymbolRefAttr::get(privatizer)); @@ -362,28 +503,34 @@ private: loop.getReduceVars(), loop.getReduceByrefAttr().asArrayRef(), loop.getReduceSymsAttr().getAsRange<mlir::SymbolRefAttr>(), loop.getRegionReduceArgs())) { - auto firReducer = - mlir::SymbolTable::lookupNearestSymbolFrom<fir::DeclareReductionOp>( - loop, sym); + auto firReducer = moduleSymbolTable.lookup<fir::DeclareReductionOp>( + sym.getLeafReference()); mlir::OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointAfter(firReducer); - - auto ompReducer = mlir::omp::DeclareReductionOp::create( - rewriter, firReducer.getLoc(), - sym.getLeafReference().str() + ".omp", - firReducer.getTypeAttr().getValue()); - - cloneFIRRegionToOMP(firReducer.getAllocRegion(), - ompReducer.getAllocRegion()); - cloneFIRRegionToOMP(firReducer.getInitializerRegion(), - ompReducer.getInitializerRegion()); - cloneFIRRegionToOMP(firReducer.getReductionRegion(), - ompReducer.getReductionRegion()); - cloneFIRRegionToOMP(firReducer.getAtomicReductionRegion(), - ompReducer.getAtomicReductionRegion()); - cloneFIRRegionToOMP(firReducer.getCleanupRegion(), - ompReducer.getCleanupRegion()); + std::string ompReducerName = sym.getLeafReference().str() + ".omp"; + + auto ompReducer = + moduleSymbolTable.lookup<mlir::omp::DeclareReductionOp>( + rewriter.getStringAttr(ompReducerName)); + + if (!ompReducer) { + ompReducer = mlir::omp::DeclareReductionOp::create( + rewriter, firReducer.getLoc(), ompReducerName, + firReducer.getTypeAttr().getValue()); + + cloneFIRRegionToOMP(firReducer.getAllocRegion(), + ompReducer.getAllocRegion()); + cloneFIRRegionToOMP(firReducer.getInitializerRegion(), + ompReducer.getInitializerRegion()); + cloneFIRRegionToOMP(firReducer.getReductionRegion(), + ompReducer.getReductionRegion()); + cloneFIRRegionToOMP(firReducer.getAtomicReductionRegion(), + ompReducer.getAtomicReductionRegion()); + cloneFIRRegionToOMP(firReducer.getCleanupRegion(), + ompReducer.getCleanupRegion()); + moduleSymbolTable.insert(ompReducer); + } wsloopClauseOps.reductionVars.push_back(op); wsloopClauseOps.reductionByref.push_back(byRef); @@ -429,8 +576,262 @@ private: return loopNestOp; } + void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn, + mlir::Value rawAddr, + llvm::SmallVectorImpl<mlir::Value> &boundsOps) const { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(rawAddr.getLoc(), builder, + hlfir::Entity{liveIn}, + /*contiguousHint=*/ + true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + builder, rawAddr, /*isOptional=*/false, rawAddr.getLoc()); + boundsOps = fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp, + mlir::omp::MapBoundsType>( + builder, info, extVal, + /*dataExvIsAssumedSize=*/false, rawAddr.getLoc()); + } + + mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder, + mlir::Value liveIn) const { + mlir::Value rawAddr = liveIn; + llvm::StringRef name; + + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp); + + if (declareOp != nullptr) { + // Use the raw address to avoid unboxing `fir.box` values whenever + // possible. Put differently, if we have access to the direct value memory + // reference/address, we use it. + rawAddr = declareOp.getOriginalBase(); + name = declareOp.getUniqName(); + } + + if (!llvm::isa<mlir::omp::PointerLikeType>(rawAddr.getType())) { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointAfter(liveInDefiningOp); + auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal); + rawAddr = copyVal; + } + + mlir::Type liveInType = liveIn.getType(); + mlir::Type eleType = liveInType; + if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType)) + eleType = refType.getElementType(); + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + + if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { + captureKind = mlir::omp::VariableCaptureKind::ByCopy; + } else if (!fir::isa_builtin_cptr_type(eleType)) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + + llvm::SmallVector<mlir::Value> boundsOps; + genBoundsOps(builder, liveIn, rawAddr, boundsOps); + + return Fortran::utils::openmp::createMapInfoOp( + builder, liveIn.getLoc(), rawAddr, + /*varPtrPtr=*/{}, name.str(), boundsOps, + /*members=*/{}, + /*membersIndex=*/mlir::ArrayAttr{}, + static_cast< + std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( + mapFlag), + captureKind, rawAddr.getType()); + } + + mlir::omp::TargetOp + genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + mlir::IRMapping &mapper, llvm::ArrayRef<mlir::Value> mappedVars, + mlir::omp::TargetOperands &clauseOps, + mlir::omp::LoopNestOperands &loopNestClauseOps, + const LiveInShapeInfoMap &liveInShapeInfoMap) const { + auto targetOp = rewriter.create<mlir::omp::TargetOp>(loc, clauseOps); + auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp); + + mlir::Region ®ion = targetOp.getRegion(); + + llvm::SmallVector<mlir::Type> regionArgTypes; + llvm::SmallVector<mlir::Location> regionArgLocs; + + for (auto var : llvm::concat<const mlir::Value>(clauseOps.hostEvalVars, + clauseOps.mapVars)) { + regionArgTypes.push_back(var.getType()); + regionArgLocs.push_back(var.getLoc()); + } + + rewriter.createBlock(®ion, {}, regionArgTypes, regionArgLocs); + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>())); + + // Within the loop, it is possible that we discover other values that need + // to be mapped to the target region (the shape info values for arrays, for + // example). Therefore, the map block args might be extended and resized. + // Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make + // sure we access the proper vector of data. + int idx = 0; + for (auto [mapInfoOp, mappedVar] : + llvm::zip_equal(clauseOps.mapVars, mappedVars)) { + auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp()); + hlfir::DeclareOp liveInDeclare = + genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx], + miOp, liveInShapeInfoMap.at(mappedVar)); + ++idx; + + // If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably + // need to "unpack" the box by getting the defining op of it's value. + // However, we did not hit this case in reality yet so leaving it as a + // todo for now. + if (mlir::isa<fir::BoxAddrOp>(mappedVar.getDefiningOp())) + TODO(mappedVar.getLoc(), + "Mapped variabled defined by `BoxAddrOp` are not supported yet"); + + auto mapHostValueToDevice = [&](mlir::Value hostValue, + mlir::Value deviceValue) { + if (!llvm::isa<mlir::omp::PointerLikeType>(hostValue.getType())) + mapper.map(hostValue, + builder.loadIfRef(hostValue.getLoc(), deviceValue)); + else + mapper.map(hostValue, deviceValue); + }; + + mapHostValueToDevice(mappedVar, liveInDeclare.getOriginalBase()); + + if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>( + mappedVar.getDefiningOp())) + mapHostValueToDevice(origDeclareOp.getBase(), liveInDeclare.getBase()); + } + + for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(), + clauseOps.hostEvalVars)) + mapper.map(hostEval, arg); + + for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) { + loopNestClauseOps.loopLowerBounds[i] = + mapper.lookup(loopNestClauseOps.loopLowerBounds[i]); + loopNestClauseOps.loopUpperBounds[i] = + mapper.lookup(loopNestClauseOps.loopUpperBounds[i]); + loopNestClauseOps.loopSteps[i] = + mapper.lookup(loopNestClauseOps.loopSteps[i]); + } + + // Check if cloning the bounds introduced any dependency on the outer + // region. If so, then either clone them as well if they are + // MemoryEffectFree, or else copy them to a new temporary and add them to + // the map and block_argument lists and replace their uses with the new + // temporary. + Fortran::utils::openmp::cloneOrMapRegionOutsiders(builder, targetOp); + rewriter.setInsertionPoint( + rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc())); + + return targetOp; + } + + hlfir::DeclareOp genLiveInDeclare( + fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp, + mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp, + const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { + mlir::Type liveInType = liveInArg.getType(); + std::string liveInName = liveInMapInfoOp.getName().has_value() + ? liveInMapInfoOp.getName().value().str() + : std::string(""); + if (fir::isa_ref_type(liveInType)) + liveInType = fir::unwrapRefType(liveInType); + + mlir::Value shape = [&]() -> mlir::Value { + if (!targetShapeCreationInfo.isShapedValue()) + return {}; + + llvm::SmallVector<mlir::Value> extentOperands; + llvm::SmallVector<mlir::Value> startIndexOperands; + + if (targetShapeCreationInfo.isShapeShiftedValue()) { + llvm::SmallVector<mlir::Value> shapeShiftOperands; + + size_t shapeIdx = 0; + for (auto [startIndex, extent] : + llvm::zip_equal(targetShapeCreationInfo.startIndices, + targetShapeCreationInfo.extents)) { + shapeShiftOperands.push_back( + Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, startIndex, + liveInName + ".start_idx.dim" + std::to_string(shapeIdx))); + shapeShiftOperands.push_back( + Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } + + auto shapeShiftType = fir::ShapeShiftType::get( + builder.getContext(), shapeShiftOperands.size() / 2); + return builder.create<fir::ShapeShiftOp>( + liveInArg.getLoc(), shapeShiftType, shapeShiftOperands); + } + + llvm::SmallVector<mlir::Value> shapeOperands; + size_t shapeIdx = 0; + for (auto extent : targetShapeCreationInfo.extents) { + shapeOperands.push_back(Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } + + return builder.create<fir::ShapeOp>(liveInArg.getLoc(), shapeOperands); + }(); + + return builder.create<hlfir::DeclareOp>(liveInArg.getLoc(), liveInArg, + liveInName, shape); + } + + mlir::omp::TeamsOp + genTeamsOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto teamsOp = rewriter.create<mlir::omp::TeamsOp>( + loc, /*clauses=*/mlir::omp::TeamsOperands{}); + + rewriter.createBlock(&teamsOp.getRegion()); + rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc)); + + return teamsOp; + } + + mlir::omp::DistributeOp + genDistributeOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto distOp = rewriter.create<mlir::omp::DistributeOp>( + loc, /*clauses=*/mlir::omp::DistributeOperands{}); + + rewriter.createBlock(&distOp.getRegion()); + return distOp; + } + bool mapToDevice; llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip; + mlir::SymbolTable &moduleSymbolTable; +}; + +/// A listener that forwards notifyOperationErased to the given callback. +struct CallbackListener : public mlir::RewriterBase::Listener { + CallbackListener(std::function<void(mlir::Operation *op)> onOperationErased) + : onOperationErased(onOperationErased) {} + + void notifyOperationErased(mlir::Operation *op) override { + onOperationErased(op); + } + + std::function<void(mlir::Operation *op)> onOperationErased; }; class DoConcurrentConversionPass @@ -444,12 +845,9 @@ public: : DoConcurrentConversionPassBase(options) {} void runOnOperation() override { - mlir::func::FuncOp func = getOperation(); - - if (func.isDeclaration()) - return; - + mlir::ModuleOp module = getOperation(); mlir::MLIRContext *context = &getContext(); + mlir::SymbolTable moduleSymbolTable(module); if (mapTo != flangomp::DoConcurrentMappingKind::DCMK_Host && mapTo != flangomp::DoConcurrentMappingKind::DCMK_Device) { @@ -460,10 +858,14 @@ public: } llvm::DenseSet<fir::DoConcurrentOp> concurrentLoopsToSkip; + CallbackListener callbackListener([&](mlir::Operation *op) { + if (auto loop = mlir::dyn_cast<fir::DoConcurrentOp>(op)) + concurrentLoopsToSkip.erase(loop); + }); mlir::RewritePatternSet patterns(context); patterns.insert<DoConcurrentConversion>( context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device, - concurrentLoopsToSkip); + concurrentLoopsToSkip, moduleSymbolTable); mlir::ConversionTarget target(*context); target.addDynamicallyLegalOp<fir::DoConcurrentOp>( [&](fir::DoConcurrentOp op) { @@ -472,8 +874,11 @@ public: target.markUnknownOpDynamicallyLegal( [](mlir::Operation *) { return true; }); - if (mlir::failed(mlir::applyFullConversion(getOperation(), target, - std::move(patterns)))) { + mlir::ConversionConfig config; + config.allowPatternRollback = false; + config.listener = &callbackListener; + if (mlir::failed(mlir::applyFullConversion(module, target, + std::move(patterns), config))) { signalPassFailure(); } } diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp index 66593ec8104f..0ff68eb01dab 100644 --- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp @@ -518,8 +518,10 @@ public: loopOp)); }); + mlir::ConversionConfig config; + config.allowPatternRollback = false; if (mlir::failed(mlir::applyFullConversion(getOperation(), target, - std::move(patterns)))) { + std::move(patterns), config))) { mlir::emitError(func.getLoc(), "error in converting `omp.loop` op"); signalPassFailure(); } diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp index 970f7d7ab063..30328573b74f 100644 --- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp +++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp @@ -53,6 +53,7 @@ class MapsForPrivatizedSymbolsPass : public flangomp::impl::MapsForPrivatizedSymbolsPassBase< MapsForPrivatizedSymbolsPass> { + // TODO Use `createMapInfoOp` from `flang/Utils/OpenMP.h`. omp::MapInfoOp createMapInfo(Location loc, Value var, fir::FirOpBuilder &builder) { // Check if a value of type `type` can be passed to the kernel by value. |
