diff options
Diffstat (limited to 'flang/lib/Lower/OpenMP/OpenMP.cpp')
| -rw-r--r-- | flang/lib/Lower/OpenMP/OpenMP.cpp | 181 |
1 files changed, 50 insertions, 131 deletions
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ec2ec37e623f..0ec33e6b24db 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -38,6 +38,7 @@ #include "flang/Semantics/tools.h" #include "flang/Support/Flags.h" #include "flang/Support/OpenMP-utils.h" +#include "flang/Utils/OpenMP.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Support/StateStack.h" @@ -47,6 +48,7 @@ using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; +using namespace Fortran::utils::openmp; //===----------------------------------------------------------------------===// // Code generation helper functions @@ -407,7 +409,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ - [&](const parser::OpenMPBlockConstruct &ompConstruct) { + [&](const parser::OmpBlockConstruct &ompConstruct) { beginClauseList = &ompConstruct.BeginDir().Clauses(); if (auto &endSpec = ompConstruct.EndDir()) endClauseList = &endSpec->Clauses(); @@ -501,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -520,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -531,7 +533,14 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); + break; + + case OMPD_teams_workdistribute: + cp.processThreadLimit(stmtCtx, hostInfo->ops); + [[fallthrough]]; + case OMPD_target_teams_workdistribute: + cp.processNumTeams(stmtCtx, hostInfo->ops); break; // Standalone 'target' case. @@ -679,7 +688,7 @@ static void threadPrivatizeVars(lower::AbstractConverter &converter, } symThreadprivateValue = lower::genCommonBlockMember( converter, currentLocation, sym->GetUltimate(), - commonThreadprivateValue); + commonThreadprivateValue, common->size()); } else { symThreadprivateValue = genThreadprivateOp(*sym); } @@ -1392,7 +1401,7 @@ static void genIntermediateCommonBlockAccessors( for (auto obj : details->objects()) { auto targetCBMemberBind = Fortran::lower::genCommonBlockMember( - converter, currentLocation, *obj, mapArg); + converter, currentLocation, *obj, mapArg, mapSym->size()); fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj); fir::ExtendedValue targetCBExv = getExtendedValue(sexv, targetCBMemberBind); @@ -1413,7 +1422,7 @@ static void genBodyOfTargetOp( auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp); mlir::Region ®ion = targetOp.getRegion(); - mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region); + genEntryBlock(firOpBuilder, args, region); bindEntryBlockArgs(converter, targetOp, args); if (HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter)) hostEvalInfo->bindOperands(argIface.getHostEvalBlockArgs()); @@ -1422,104 +1431,7 @@ static void genBodyOfTargetOp( // If so, then either clone them as well if they are MemoryEffectFree, or else // copy them to a new temporary and add them to the map and block_argument // lists and replace their uses with the new temporary. - llvm::SetVector<mlir::Value> valuesDefinedAbove; - mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); - while (!valuesDefinedAbove.empty()) { - for (mlir::Value val : valuesDefinedAbove) { - mlir::Operation *valOp = val.getDefiningOp(); - - // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the - // indices separately, as the alternative is to eventually map the Box, - // which comes with a fairly large overhead comparatively. We could be - // more robust about this and check using a BackwardsSlice to see if we - // run the risk of mapping a box. - if (valOp && mlir::isMemoryEffectFree(valOp) && - !mlir::isa<fir::BoxDimsOp>(valOp)) { - mlir::Operation *clonedOp = valOp->clone(); - entryBlock->push_front(clonedOp); - - auto replace = [entryBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == entryBlock; - }; - - valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace); - valOp->replaceUsesWithIf(clonedOp, replace); - } else { - auto savedIP = firOpBuilder.getInsertionPoint(); - - if (valOp) - firOpBuilder.setInsertionPointAfter(valOp); - else - // This means val is a block argument - firOpBuilder.setInsertionPoint(targetOp); - - auto copyVal = - firOpBuilder.createTemporary(val.getLoc(), val.getType()); - firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); - - fir::factory::AddrAndBoundsInfo info = - fir::factory::getDataOperandBaseAddr( - firOpBuilder, val, /*isOptional=*/false, val.getLoc()); - llvm::SmallVector<mlir::Value> bounds = - fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp, - mlir::omp::MapBoundsType>( - firOpBuilder, info, - hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder, - hlfir::Entity{val}) - .first, - /*dataExvIsAssumedSize=*/false, val.getLoc()); - - std::stringstream name; - firOpBuilder.setInsertionPoint(targetOp); - - llvm::omp::OpenMPOffloadMappingFlags mapFlag = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; - mlir::omp::VariableCaptureKind captureKind = - mlir::omp::VariableCaptureKind::ByRef; - - mlir::Type eleType = copyVal.getType(); - if (auto refType = - mlir::dyn_cast<fir::ReferenceType>(copyVal.getType())) - eleType = refType.getElementType(); - - if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { - captureKind = mlir::omp::VariableCaptureKind::ByCopy; - } else if (!fir::isa_builtin_cptr_type(eleType)) { - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - } - - mlir::Value mapOp = createMapInfoOp( - firOpBuilder, copyVal.getLoc(), copyVal, - /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, - /*members=*/llvm::SmallVector<mlir::Value>{}, - /*membersIndex=*/mlir::ArrayAttr{}, - static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - mapFlag), - captureKind, copyVal.getType()); - - // Get the index of the first non-map argument before modifying mapVars, - // then append an element to mapVars and an associated entry block - // argument at that index. - unsigned insertIndex = - argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs(); - targetOp.getMapVarsMutable().append(mapOp); - mlir::Value clonedValArg = region.insertArgument( - insertIndex, copyVal.getType(), copyVal.getLoc()); - - firOpBuilder.setInsertionPointToStart(entryBlock); - auto loadOp = fir::LoadOp::create(firOpBuilder, clonedValArg.getLoc(), - clonedValArg); - val.replaceUsesWithIf(loadOp->getResult(0), - [entryBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == entryBlock; - }); - firOpBuilder.setInsertionPoint(entryBlock, savedIP); - } - } - valuesDefinedAbove.clear(); - mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); - } + cloneOrMapRegionOutsiders(firOpBuilder, targetOp); // Insert dummy instruction to remember the insertion position. The // marker will be deleted since there are not uses. @@ -1657,9 +1569,10 @@ genLoopNestClauses(lower::AbstractConverter &converter, HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv)) - cp.processCollapse(loc, eval, clauseOps, iv); + cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + cp.processTileSizes(eval, clauseOps); } static void genLoopClauses( @@ -2036,9 +1949,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector<const semantics::Symbol *>(iv); }; - auto *nestedEval = - getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); - + uint64_t nestValue = getCollapseValue(item->clauses); + nestValue = nestValue < iv.size() ? iv.size() : nestValue; + auto *nestedEval = getCollapsedLoopEval(eval, nestValue); return genOpWithBody<mlir::omp::LoopNestOp>( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -2820,6 +2733,17 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::omp::WorkdistributeOp genWorkdistributeOp( + lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + return genOpWithBody<mlir::omp::WorkdistributeOp>( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, + llvm::omp::Directive::OMPD_workdistribute), + queue, item); +} + //===----------------------------------------------------------------------===// // Code generation functions for the standalone version of constructs that can // also be a leaf of a composite construct @@ -3237,7 +3161,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd( DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/true, symTable); - simdItemDSP.processStep1(&simdClauseOps); + simdItemDSP.processStep1(&simdClauseOps, simdItem->id); // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. @@ -3459,7 +3383,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - // case llvm::omp::Directive::OMPD_workdistribute: + case llvm::omp::Directive::OMPD_workdistribute: + newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue, + item); + break; case llvm::omp::Directive::OMPD_workshare: newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); @@ -3766,7 +3693,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - const parser::OpenMPBlockConstruct &blockConstruct) { + const parser::OmpBlockConstruct &blockConstruct) { const parser::OmpDirectiveSpecification &beginSpec = blockConstruct.BeginDir(); List<Clause> clauses = makeClauses(beginSpec.Clauses(), semaCtx); @@ -3917,8 +3844,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: - // Emit the omp.loop_nest with annotation for tiling - genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + // Skip OMPD_tile since the tile sizes will be retrieved when + // generating the omp.loop_nest op. break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; @@ -3958,9 +3885,12 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, List<Clause> clauses = makeClauses( std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx); const auto &endSectionsDirective = - std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t); + std::get<std::optional<parser::OmpEndSectionsDirective>>( + sectionsConstruct.t); + assert(endSectionsDirective && + "Missing end section directive should have been handled in semantics"); clauses.append(makeClauses( - std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx)); + std::get<parser::OmpClauseList>(endSectionsDirective->t), semaCtx)); mlir::Location currentLocation = converter.getCurrentLocation(); llvm::omp::Directive directive = @@ -4028,18 +3958,6 @@ void Fortran::lower::genOpenMPSymbolProperties( lower::genDeclareTargetIntGlobal(converter, var); } -int64_t -Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { - for (const parser::OmpClause &clause : clauseList.v) { - if (const auto &collapseClause = - std::get_if<parser::OmpClause::Collapse>(&clause.u)) { - const auto *expr = semantics::GetExpr(collapseClause->v); - return evaluate::ToInt64(*expr).value(); - } - } - return 1; -} - void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -4060,8 +3978,9 @@ void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, firOpBuilder, currentLocation, commonValue.getType(), commonValue); converter.bindSymbol(*common, commonThreadprivateValue); // Generate the threadprivate value for the common block member. - symThreadprivateValue = genCommonBlockMember(converter, currentLocation, - sym, commonThreadprivateValue); + symThreadprivateValue = + genCommonBlockMember(converter, currentLocation, sym, + commonThreadprivateValue, common->size()); } else if (!var.isGlobal()) { // Non-global variable which can be in threadprivate directive must be one // variable in main program, and it has implicit SAVE attribute. Take it as @@ -4124,7 +4043,7 @@ void Fortran::lower::genDeclareTargetIntGlobal( bool Fortran::lower::isOpenMPTargetConstruct( const parser::OpenMPConstruct &omp) { llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown; - if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) { + if (const auto *block = std::get_if<parser::OmpBlockConstruct>(&omp.u)) { dir = block->BeginDir().DirId(); } else if (const auto *loop = std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) { |
