summaryrefslogtreecommitdiff
path: root/flang/lib/Lower/OpenMP/OpenMP.cpp
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2025-09-10 15:25:31 -0700
committerGitHub <noreply@github.com>2025-09-10 15:25:31 -0700
commit1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch)
tree57f4b1f313c8cf74eed8819870f39c36ea263c68 /flang/lib/Lower/OpenMP/OpenMP.cpp
parent898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff)
parentb8cefcb601ddaa18482555c4ff363c01a270c2fe (diff)
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'flang/lib/Lower/OpenMP/OpenMP.cpp')
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp181
1 files changed, 50 insertions, 131 deletions
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index ec2ec37e623f..0ec33e6b24db 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -38,6 +38,7 @@
#include "flang/Semantics/tools.h"
#include "flang/Support/Flags.h"
#include "flang/Support/OpenMP-utils.h"
+#include "flang/Utils/OpenMP.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Support/StateStack.h"
@@ -47,6 +48,7 @@
using namespace Fortran::lower::omp;
using namespace Fortran::common::openmp;
+using namespace Fortran::utils::openmp;
//===----------------------------------------------------------------------===//
// Code generation helper functions
@@ -407,7 +409,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
const parser::OmpClauseList *endClauseList = nullptr;
common::visit(
common::visitors{
- [&](const parser::OpenMPBlockConstruct &ompConstruct) {
+ [&](const parser::OmpBlockConstruct &ompConstruct) {
beginClauseList = &ompConstruct.BeginDir().Clauses();
if (auto &endSpec = ompConstruct.EndDir())
endClauseList = &endSpec->Clauses();
@@ -501,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
[[fallthrough]];
case OMPD_distribute:
case OMPD_distribute_simd:
- cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
+ cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv);
break;
case OMPD_teams:
@@ -520,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
[[fallthrough]];
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
- cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
+ cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv);
cp.processNumTeams(stmtCtx, hostInfo->ops);
break;
@@ -531,7 +533,14 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
cp.processNumTeams(stmtCtx, hostInfo->ops);
[[fallthrough]];
case OMPD_loop:
- cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
+ cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv);
+ break;
+
+ case OMPD_teams_workdistribute:
+ cp.processThreadLimit(stmtCtx, hostInfo->ops);
+ [[fallthrough]];
+ case OMPD_target_teams_workdistribute:
+ cp.processNumTeams(stmtCtx, hostInfo->ops);
break;
// Standalone 'target' case.
@@ -679,7 +688,7 @@ static void threadPrivatizeVars(lower::AbstractConverter &converter,
}
symThreadprivateValue = lower::genCommonBlockMember(
converter, currentLocation, sym->GetUltimate(),
- commonThreadprivateValue);
+ commonThreadprivateValue, common->size());
} else {
symThreadprivateValue = genThreadprivateOp(*sym);
}
@@ -1392,7 +1401,7 @@ static void genIntermediateCommonBlockAccessors(
for (auto obj : details->objects()) {
auto targetCBMemberBind = Fortran::lower::genCommonBlockMember(
- converter, currentLocation, *obj, mapArg);
+ converter, currentLocation, *obj, mapArg, mapSym->size());
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj);
fir::ExtendedValue targetCBExv =
getExtendedValue(sexv, targetCBMemberBind);
@@ -1413,7 +1422,7 @@ static void genBodyOfTargetOp(
auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
mlir::Region &region = targetOp.getRegion();
- mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region);
+ genEntryBlock(firOpBuilder, args, region);
bindEntryBlockArgs(converter, targetOp, args);
if (HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter))
hostEvalInfo->bindOperands(argIface.getHostEvalBlockArgs());
@@ -1422,104 +1431,7 @@ static void genBodyOfTargetOp(
// If so, then either clone them as well if they are MemoryEffectFree, or else
// copy them to a new temporary and add them to the map and block_argument
// lists and replace their uses with the new temporary.
- llvm::SetVector<mlir::Value> valuesDefinedAbove;
- mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
- while (!valuesDefinedAbove.empty()) {
- for (mlir::Value val : valuesDefinedAbove) {
- mlir::Operation *valOp = val.getDefiningOp();
-
- // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the
- // indices separately, as the alternative is to eventually map the Box,
- // which comes with a fairly large overhead comparatively. We could be
- // more robust about this and check using a BackwardsSlice to see if we
- // run the risk of mapping a box.
- if (valOp && mlir::isMemoryEffectFree(valOp) &&
- !mlir::isa<fir::BoxDimsOp>(valOp)) {
- mlir::Operation *clonedOp = valOp->clone();
- entryBlock->push_front(clonedOp);
-
- auto replace = [entryBlock](mlir::OpOperand &use) {
- return use.getOwner()->getBlock() == entryBlock;
- };
-
- valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace);
- valOp->replaceUsesWithIf(clonedOp, replace);
- } else {
- auto savedIP = firOpBuilder.getInsertionPoint();
-
- if (valOp)
- firOpBuilder.setInsertionPointAfter(valOp);
- else
- // This means val is a block argument
- firOpBuilder.setInsertionPoint(targetOp);
-
- auto copyVal =
- firOpBuilder.createTemporary(val.getLoc(), val.getType());
- firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
-
- fir::factory::AddrAndBoundsInfo info =
- fir::factory::getDataOperandBaseAddr(
- firOpBuilder, val, /*isOptional=*/false, val.getLoc());
- llvm::SmallVector<mlir::Value> bounds =
- fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
- mlir::omp::MapBoundsType>(
- firOpBuilder, info,
- hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder,
- hlfir::Entity{val})
- .first,
- /*dataExvIsAssumedSize=*/false, val.getLoc());
-
- std::stringstream name;
- firOpBuilder.setInsertionPoint(targetOp);
-
- llvm::omp::OpenMPOffloadMappingFlags mapFlag =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
- mlir::omp::VariableCaptureKind captureKind =
- mlir::omp::VariableCaptureKind::ByRef;
-
- mlir::Type eleType = copyVal.getType();
- if (auto refType =
- mlir::dyn_cast<fir::ReferenceType>(copyVal.getType()))
- eleType = refType.getElementType();
-
- if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
- captureKind = mlir::omp::VariableCaptureKind::ByCopy;
- } else if (!fir::isa_builtin_cptr_type(eleType)) {
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
- }
-
- mlir::Value mapOp = createMapInfoOp(
- firOpBuilder, copyVal.getLoc(), copyVal,
- /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
- /*members=*/llvm::SmallVector<mlir::Value>{},
- /*membersIndex=*/mlir::ArrayAttr{},
- static_cast<
- std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- mapFlag),
- captureKind, copyVal.getType());
-
- // Get the index of the first non-map argument before modifying mapVars,
- // then append an element to mapVars and an associated entry block
- // argument at that index.
- unsigned insertIndex =
- argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs();
- targetOp.getMapVarsMutable().append(mapOp);
- mlir::Value clonedValArg = region.insertArgument(
- insertIndex, copyVal.getType(), copyVal.getLoc());
-
- firOpBuilder.setInsertionPointToStart(entryBlock);
- auto loadOp = fir::LoadOp::create(firOpBuilder, clonedValArg.getLoc(),
- clonedValArg);
- val.replaceUsesWithIf(loadOp->getResult(0),
- [entryBlock](mlir::OpOperand &use) {
- return use.getOwner()->getBlock() == entryBlock;
- });
- firOpBuilder.setInsertionPoint(entryBlock, savedIP);
- }
- }
- valuesDefinedAbove.clear();
- mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
- }
+ cloneOrMapRegionOutsiders(firOpBuilder, targetOp);
// Insert dummy instruction to remember the insertion position. The
// marker will be deleted since there are not uses.
@@ -1657,9 +1569,10 @@ genLoopNestClauses(lower::AbstractConverter &converter,
HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter);
if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv))
- cp.processCollapse(loc, eval, clauseOps, iv);
+ cp.processCollapse(loc, eval, clauseOps, clauseOps, iv);
clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
+ cp.processTileSizes(eval, clauseOps);
}
static void genLoopClauses(
@@ -2036,9 +1949,9 @@ static mlir::omp::LoopNestOp genLoopNestOp(
return llvm::SmallVector<const semantics::Symbol *>(iv);
};
- auto *nestedEval =
- getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
-
+ uint64_t nestValue = getCollapseValue(item->clauses);
+ nestValue = nestValue < iv.size() ? iv.size() : nestValue;
+ auto *nestedEval = getCollapsedLoopEval(eval, nestValue);
return genOpWithBody<mlir::omp::LoopNestOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
directive)
@@ -2820,6 +2733,17 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
queue, item, clauseOps);
}
+static mlir::omp::WorkdistributeOp genWorkdistributeOp(
+ lower::AbstractConverter &converter, lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+ mlir::Location loc, const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
+ return genOpWithBody<mlir::omp::WorkdistributeOp>(
+ OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+ llvm::omp::Directive::OMPD_workdistribute),
+ queue, item);
+}
+
//===----------------------------------------------------------------------===//
// Code generation functions for the standalone version of constructs that can
// also be a leaf of a composite construct
@@ -3237,7 +3161,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd(
DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
/*useDelayedPrivatization=*/true, symTable);
- simdItemDSP.processStep1(&simdClauseOps);
+ simdItemDSP.processStep1(&simdClauseOps, simdItem->id);
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
// is placed by construct decomposition.
@@ -3459,7 +3383,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
case llvm::omp::Directive::OMPD_unroll:
genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
break;
- // case llvm::omp::Directive::OMPD_workdistribute:
+ case llvm::omp::Directive::OMPD_workdistribute:
+ newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue,
+ item);
+ break;
case llvm::omp::Directive::OMPD_workshare:
newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
queue, item);
@@ -3766,7 +3693,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
- const parser::OpenMPBlockConstruct &blockConstruct) {
+ const parser::OmpBlockConstruct &blockConstruct) {
const parser::OmpDirectiveSpecification &beginSpec =
blockConstruct.BeginDir();
List<Clause> clauses = makeClauses(beginSpec.Clauses(), semaCtx);
@@ -3917,8 +3844,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v;
switch (nestedDirective) {
case llvm::omp::Directive::OMPD_tile:
- // Emit the omp.loop_nest with annotation for tiling
- genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value());
+ // Skip OMPD_tile since the tile sizes will be retrieved when
+ // generating the omp.loop_nest op.
break;
default: {
unsigned version = semaCtx.langOptions().OpenMPVersion;
@@ -3958,9 +3885,12 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
List<Clause> clauses = makeClauses(
std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx);
const auto &endSectionsDirective =
- std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t);
+ std::get<std::optional<parser::OmpEndSectionsDirective>>(
+ sectionsConstruct.t);
+ assert(endSectionsDirective &&
+ "Missing end section directive should have been handled in semantics");
clauses.append(makeClauses(
- std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx));
+ std::get<parser::OmpClauseList>(endSectionsDirective->t), semaCtx));
mlir::Location currentLocation = converter.getCurrentLocation();
llvm::omp::Directive directive =
@@ -4028,18 +3958,6 @@ void Fortran::lower::genOpenMPSymbolProperties(
lower::genDeclareTargetIntGlobal(converter, var);
}
-int64_t
-Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) {
- for (const parser::OmpClause &clause : clauseList.v) {
- if (const auto &collapseClause =
- std::get_if<parser::OmpClause::Collapse>(&clause.u)) {
- const auto *expr = semantics::GetExpr(collapseClause->v);
- return evaluate::ToInt64(*expr).value();
- }
- }
- return 1;
-}
-
void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
const lower::pft::Variable &var) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -4060,8 +3978,9 @@ void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
firOpBuilder, currentLocation, commonValue.getType(), commonValue);
converter.bindSymbol(*common, commonThreadprivateValue);
// Generate the threadprivate value for the common block member.
- symThreadprivateValue = genCommonBlockMember(converter, currentLocation,
- sym, commonThreadprivateValue);
+ symThreadprivateValue =
+ genCommonBlockMember(converter, currentLocation, sym,
+ commonThreadprivateValue, common->size());
} else if (!var.isGlobal()) {
// Non-global variable which can be in threadprivate directive must be one
// variable in main program, and it has implicit SAVE attribute. Take it as
@@ -4124,7 +4043,7 @@ void Fortran::lower::genDeclareTargetIntGlobal(
bool Fortran::lower::isOpenMPTargetConstruct(
const parser::OpenMPConstruct &omp) {
llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown;
- if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) {
+ if (const auto *block = std::get_if<parser::OmpBlockConstruct>(&omp.u)) {
dir = block->BeginDir().DirId();
} else if (const auto *loop =
std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) {