summaryrefslogtreecommitdiff
path: root/flang/lib/Lower
diff options
context:
space:
mode:
Diffstat (limited to 'flang/lib/Lower')
-rw-r--r--flang/lib/Lower/Allocatable.cpp37
-rw-r--r--flang/lib/Lower/Bridge.cpp8
-rw-r--r--flang/lib/Lower/CMakeLists.txt1
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.cpp2
-rw-r--r--flang/lib/Lower/OpenMP/DataSharingProcessor.cpp3
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp62
-rw-r--r--flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp236
-rw-r--r--flang/lib/Lower/OpenMP/PrivateReductionUtils.h51
-rw-r--r--flang/lib/Lower/OpenMP/ReductionProcessor.cpp234
9 files changed, 397 insertions, 237 deletions
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index fb8380ac7e8c..dc135543fafc 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -22,12 +22,14 @@
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/Runtime.h"
#include "flang/Lower/StatementContext.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/Support/FatalError.h"
#include "flang/Optimizer/Support/InternalNames.h"
#include "flang/Parser/parse-tree.h"
@@ -452,6 +454,19 @@ private:
alloc.getSymbol());
}
+ void setPinnedToFalse() {
+ if (!pinnedExpr)
+ return;
+ Fortran::lower::StatementContext stmtCtx;
+ mlir::Value pinned =
+ fir::getBase(converter.genExprAddr(loc, *pinnedExpr, stmtCtx));
+ mlir::Location loc = pinned.getLoc();
+ mlir::Value falseValue = builder.createBool(loc, false);
+ mlir::Value falseConv = builder.createConvert(
+ loc, fir::unwrapRefType(pinned.getType()), falseValue);
+ builder.create<fir::StoreOp>(loc, falseConv, pinned);
+ }
+
void genSimpleAllocation(const Allocation &alloc,
const fir::MutableBoxValue &box) {
bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
@@ -467,6 +482,7 @@ private:
// can be validated.
genInlinedAllocation(alloc, box);
postAllocationAction(alloc);
+ setPinnedToFalse();
return;
}
@@ -480,11 +496,13 @@ private:
genSetDeferredLengthParameters(alloc, box);
genAllocateObjectBounds(alloc, box);
mlir::Value stat;
- if (!isCudaSymbol)
+ if (!isCudaSymbol) {
stat = genRuntimeAllocate(builder, loc, box, errorManager);
- else
+ setPinnedToFalse();
+ } else {
stat =
genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol());
+ }
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
postAllocationAction(alloc);
errorManager.assignStat(builder, loc, stat);
@@ -614,13 +632,16 @@ private:
genSetDeferredLengthParameters(alloc, box);
genAllocateObjectBounds(alloc, box);
mlir::Value stat;
- if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol()))
+ if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) {
stat =
genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol());
- else if (isSource)
- stat = genRuntimeAllocateSource(builder, loc, box, exv, errorManager);
- else
- stat = genRuntimeAllocate(builder, loc, box, errorManager);
+ } else {
+ if (isSource)
+ stat = genRuntimeAllocateSource(builder, loc, box, exv, errorManager);
+ else
+ stat = genRuntimeAllocate(builder, loc, box, errorManager);
+ setPinnedToFalse();
+ }
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
postAllocationAction(alloc);
errorManager.assignStat(builder, loc, stat);
@@ -1093,11 +1114,13 @@ void Fortran::lower::associateMutableBox(
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
if (Fortran::evaluate::UnwrapExpr<Fortran::evaluate::NullPointer>(source)) {
fir::factory::disassociateMutableBox(builder, loc, box);
+ cuf::genPointerSync(box.getAddr(), builder);
return;
}
if (converter.getLoweringOptions().getLowerToHighLevelFIR()) {
fir::ExtendedValue rhs = converter.genExprAddr(loc, source, stmtCtx);
fir::factory::associateMutableBox(builder, loc, box, rhs, lbounds);
+ cuf::genPointerSync(box.getAddr(), builder);
return;
}
// The right hand side is not be evaluated into a temp. Array sections can
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index c7e2635230e9..37f51d74d23f 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -34,6 +34,7 @@
#include "flang/Lower/StatementContext.h"
#include "flang/Lower/Support/Utils.h"
#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
#include "flang/Optimizer/Builder/Character.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Runtime/Assign.h"
@@ -832,7 +833,11 @@ public:
if_builder.end();
},
[&](const auto &) -> void {
- if (skipDefaultInit)
+ // Always initialize allocatable component descriptor, even when the
+ // value is later copied from the host (e.g. firstprivate) because the
+ // assignment from the host to the copy will fail if the component
+ // descriptors are not initialized.
+ if (skipDefaultInit && !hlfir::mayHaveAllocatableComponent(hSymType))
return;
// Initialize local/private derived types with default
// initialization (Fortran 2023 section 11.1.7.5 and OpenMP 5.2
@@ -3952,6 +3957,7 @@ private:
} else {
fir::MutableBoxValue box = genExprMutableBox(loc, *expr);
fir::factory::disassociateMutableBox(*builder, loc, box);
+ cuf::genPointerSync(box.getAddr(), *builder);
}
}
}
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index ba6622d8504a..f57f0e7a77a0 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -29,6 +29,7 @@ add_flang_library(FortranLower
OpenMP/DataSharingProcessor.cpp
OpenMP/Decomposer.cpp
OpenMP/OpenMP.cpp
+ OpenMP/PrivateReductionUtils.cpp
OpenMP/ReductionProcessor.cpp
OpenMP/Utils.cpp
PFTBuilder.cpp
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index c4ab5e0033d0..fb8e007c7af5 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -613,6 +613,8 @@ addAlignedClause(lower::AbstractConverter &converter,
// Do not generate alignment assumption if alignment is less than or equal to
// 0.
if (alignment > 0) {
+ // alignment value must be power of 2
+ assert((alignment & (alignment - 1)) == 0 && "alignment is not power of 2");
auto &objects = std::get<omp::ObjectList>(clause.t);
if (!objects.empty())
genObjectList(objects, converter, alignedVars);
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index cd312537551e..9dfdbd8337ae 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -126,7 +126,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {
assert(sb);
mlir::Value addr = sb.getAddr();
assert(addr);
- return hlfir::mayHaveAllocatableComponent(addr.getType());
+ return !fir::isPointerType(addr.getType()) &&
+ hlfir::mayHaveAllocatableComponent(addr.getType());
};
if (needInitClone()) {
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index b07e89d201d1..cd4b25a17722 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -923,13 +923,24 @@ static void genBodyOfTargetOp(
while (!valuesDefinedAbove.empty()) {
for (mlir::Value val : valuesDefinedAbove) {
mlir::Operation *valOp = val.getDefiningOp();
- if (mlir::isMemoryEffectFree(valOp)) {
+ assert(valOp != nullptr);
+
+ // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the
+ // indices separately, as the alternative is to eventually map the Box,
+ // which comes with a fairly large overhead comparatively. We could be
+ // more robust about this and check using a BackwardsSlice to see if we
+ // run the risk of mapping a box.
+ if (mlir::isMemoryEffectFree(valOp) &&
+ !mlir::isa<fir::BoxDimsOp>(valOp)) {
mlir::Operation *clonedOp = valOp->clone();
entryBlock->push_front(clonedOp);
- val.replaceUsesWithIf(clonedOp->getResult(0),
- [entryBlock](mlir::OpOperand &use) {
- return use.getOwner()->getBlock() == entryBlock;
- });
+
+ auto replace = [entryBlock](mlir::OpOperand &use) {
+ return use.getOwner()->getBlock() == entryBlock;
+ };
+
+ valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace);
+ valOp->replaceUsesWithIf(clonedOp, replace);
} else {
auto savedIP = firOpBuilder.getInsertionPoint();
firOpBuilder.setInsertionPointAfter(valOp);
@@ -937,9 +948,36 @@ static void genBodyOfTargetOp(
firOpBuilder.createTemporary(val.getLoc(), val.getType());
firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
- llvm::SmallVector<mlir::Value> bounds;
+ lower::AddrAndBoundsInfo info = lower::getDataOperandBaseAddr(
+ firOpBuilder, val, /*isOptional=*/false, val.getLoc());
+ llvm::SmallVector<mlir::Value> bounds =
+ Fortran::lower::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+ mlir::omp::MapBoundsType>(
+ firOpBuilder, info,
+ hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder,
+ hlfir::Entity{val})
+ .first,
+ /*dataExvIsAssumedSize=*/false, val.getLoc());
+
std::stringstream name;
firOpBuilder.setInsertionPoint(targetOp);
+
+ llvm::omp::OpenMPOffloadMappingFlags mapFlag =
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mlir::omp::VariableCaptureKind captureKind =
+ mlir::omp::VariableCaptureKind::ByRef;
+
+ mlir::Type eleType = copyVal.getType();
+ if (auto refType =
+ mlir::dyn_cast<fir::ReferenceType>(copyVal.getType()))
+ eleType = refType.getElementType();
+
+ if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
+ captureKind = mlir::omp::VariableCaptureKind::ByCopy;
+ } else if (!fir::isa_builtin_cptr_type(eleType)) {
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ }
+
mlir::Value mapOp = createMapInfoOp(
firOpBuilder, copyVal.getLoc(), copyVal,
/*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
@@ -947,8 +985,8 @@ static void genBodyOfTargetOp(
/*membersIndex=*/mlir::ArrayAttr{},
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
- mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
+ mapFlag),
+ captureKind, copyVal.getType());
// Get the index of the first non-map argument before modifying mapVars,
// then append an element to mapVars and an associated entry block
@@ -2586,6 +2624,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//
// OpenMPDeclarativeConstruct visitors
//===----------------------------------------------------------------------===//
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ const parser::OpenMPUtilityConstruct &);
static void
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
@@ -2907,8 +2949,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
- const parser::OpenMPErrorConstruct &) {
- TODO(converter.getCurrentLocation(), "OpenMPErrorConstruct");
+ const parser::OpenMPUtilityConstruct &) {
+ TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
new file mode 100644
index 000000000000..83f0d4e93ca5
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
@@ -0,0 +1,236 @@
+//===-- PrivateReductionUtils.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "PrivateReductionUtils.h"
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Support/FatalError.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/Location.h"
+
+static void createCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Type argType,
+ mlir::Region &cleanupRegion) {
+ assert(cleanupRegion.empty());
+ mlir::Block *block = builder.createBlock(&cleanupRegion, cleanupRegion.end(),
+ {argType}, {loc});
+ builder.setInsertionPointToEnd(block);
+
+ auto typeError = [loc]() {
+ fir::emitFatalError(loc,
+ "Attempt to create an omp cleanup region "
+ "for a type that wasn't allocated",
+ /*genCrashDiag=*/true);
+ };
+
+ mlir::Type valTy = fir::unwrapRefType(argType);
+ if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) {
+ if (!mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy())) {
+ mlir::Type innerTy = fir::extractSequenceType(boxTy);
+ if (!mlir::isa<fir::SequenceType>(innerTy))
+ typeError();
+ }
+
+ mlir::Value arg = builder.loadIfRef(loc, block->getArgument(0));
+ assert(mlir::isa<fir::BaseBoxType>(arg.getType()));
+
+ // Deallocate box
+ // The FIR type system doesn't nesecarrily know that this is a mutable box
+ // if we allocated the thread local array on the heap to avoid looped stack
+ // allocations.
+ mlir::Value addr =
+ hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg});
+ mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr);
+ fir::IfOp ifOp =
+ builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false);
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+
+ mlir::Value cast = builder.createConvert(
+ loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr);
+ builder.create<fir::FreeMemOp>(loc, cast);
+
+ builder.setInsertionPointAfter(ifOp);
+ builder.create<mlir::omp::YieldOp>(loc);
+ return;
+ }
+
+ typeError();
+}
+
+fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ mlir::Value box) {
+ fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>(
+ hlfir::getFortranElementOrSequenceType(box.getType()));
+ const unsigned rank = sequenceType.getDimension();
+ llvm::SmallVector<mlir::Value> lbAndExtents;
+ lbAndExtents.reserve(rank * 2);
+
+ mlir::Type idxTy = builder.getIndexType();
+ for (unsigned i = 0; i < rank; ++i) {
+ // TODO: ideally we want to hoist box reads out of the critical section.
+ // We could do this by having box dimensions in block arguments like
+ // OpenACC does
+ mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
+ auto dimInfo =
+ builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
+ lbAndExtents.push_back(dimInfo.getLowerBound());
+ lbAndExtents.push_back(dimInfo.getExtent());
+ }
+
+ auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
+ auto shapeShift =
+ builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents);
+ return shapeShift;
+}
+
+void Fortran::lower::omp::populateByRefInitAndCleanupRegions(
+ fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argType,
+ mlir::Value scalarInitValue, mlir::Block *initBlock,
+ mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
+ mlir::Region &cleanupRegion) {
+ mlir::Type ty = fir::unwrapRefType(argType);
+ builder.setInsertionPointToEnd(initBlock);
+ auto yield = [&](mlir::Value ret) {
+ builder.create<mlir::omp::YieldOp>(loc, ret);
+ };
+
+ if (fir::isa_trivial(ty)) {
+ builder.setInsertionPointToEnd(initBlock);
+
+ if (scalarInitValue)
+ builder.createStoreWithConvert(loc, scalarInitValue, allocatedPrivVarArg);
+ yield(allocatedPrivVarArg);
+ return;
+ }
+
+ // check if an allocatable box is unallocated. If so, initialize the boxAlloca
+ // to be unallocated e.g.
+ // %box_alloca = fir.alloca !fir.box<!fir.heap<...>>
+ // %addr = fir.box_addr %box
+ // if (%addr == 0) {
+ // %nullbox = fir.embox %addr
+ // fir.store %nullbox to %box_alloca
+ // } else {
+ // // ...
+ // fir.store %something to %box_alloca
+ // }
+ // omp.yield %box_alloca
+ moldArg = builder.loadIfRef(loc, moldArg);
+ auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp {
+ mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, moldArg);
+ mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr);
+ fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated,
+ /*withElseRegion=*/true);
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+ // just embox the null address and return
+ mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr);
+ builder.create<fir::StoreOp>(loc, nullBox, boxAlloca);
+ return ifOp;
+ };
+
+ // all arrays are boxed
+ if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
+ bool isAllocatableOrPointer =
+ mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy());
+
+ builder.setInsertionPointToEnd(initBlock);
+ mlir::Value boxAlloca = allocatedPrivVarArg;
+ mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy());
+ if (fir::isa_trivial(innerTy)) {
+ // boxed non-sequence value e.g. !fir.box<!fir.heap<i32>>
+ if (!isAllocatableOrPointer)
+ TODO(loc,
+ "Reduction/Privatization of non-allocatable trivial typed box");
+
+ fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca);
+
+ builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
+ mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy);
+ if (scalarInitValue)
+ builder.createStoreWithConvert(loc, scalarInitValue, valAlloc);
+ mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc);
+ builder.create<fir::StoreOp>(loc, box, boxAlloca);
+
+ createCleanupRegion(builder, loc, argType, cleanupRegion);
+ builder.setInsertionPointAfter(ifUnallocated);
+ yield(boxAlloca);
+ return;
+ }
+ innerTy = fir::extractSequenceType(boxTy);
+ if (!mlir::isa<fir::SequenceType>(innerTy))
+ TODO(loc, "Unsupported boxed type for reduction/privatization");
+
+ fir::IfOp ifUnallocated{nullptr};
+ if (isAllocatableOrPointer) {
+ ifUnallocated = handleNullAllocatable(boxAlloca);
+ builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
+ }
+
+ // Create the private copy from the initial fir.box:
+ mlir::Value loadedBox = builder.loadIfRef(loc, moldArg);
+ hlfir::Entity source = hlfir::Entity{loadedBox};
+
+ // Allocating on the heap in case the whole reduction is nested inside of a
+ // loop
+ // TODO: compare performance here to using allocas - this could be made to
+ // work by inserting stacksave/stackrestore around the reduction in
+ // openmpirbuilder
+ auto [temp, needsDealloc] = createTempFromMold(loc, builder, source);
+ // if needsDealloc isn't statically false, add cleanup region. Always
+ // do this for allocatable boxes because they might have been re-allocated
+ // in the body of the loop/parallel region
+
+ std::optional<int64_t> cstNeedsDealloc =
+ fir::getIntIfConstant(needsDealloc);
+ assert(cstNeedsDealloc.has_value() &&
+ "createTempFromMold decides this statically");
+ if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ createCleanupRegion(builder, loc, argType, cleanupRegion);
+ } else {
+ assert(!isAllocatableOrPointer &&
+ "Pointer-like arrays must be heap allocated");
+ }
+
+ // Put the temporary inside of a box:
+ // hlfir::genVariableBox doesn't handle non-default lower bounds
+ mlir::Value box;
+ fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox);
+ mlir::Type boxType = loadedBox.getType();
+ if (mlir::isa<fir::BaseBoxType>(temp.getType()))
+ // the box created by the declare form createTempFromMold is missing lower
+ // bounds info
+ box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift,
+ /*shift=*/mlir::Value{});
+ else
+ box = builder.create<fir::EmboxOp>(
+ loc, boxType, temp, shapeShift,
+ /*slice=*/mlir::Value{},
+ /*typeParams=*/llvm::ArrayRef<mlir::Value>{});
+
+ if (scalarInitValue)
+ builder.create<hlfir::AssignOp>(loc, scalarInitValue, box);
+ builder.create<fir::StoreOp>(loc, box, boxAlloca);
+ if (ifUnallocated)
+ builder.setInsertionPointAfter(ifUnallocated);
+ yield(boxAlloca);
+ return;
+ }
+
+ TODO(loc,
+ "creating reduction/privatization init region for unsupported type");
+ return;
+}
diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h
new file mode 100644
index 000000000000..b4abc40cd4b6
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h
@@ -0,0 +1,51 @@
+//===-- Lower/OpenMP/PrivateReductionUtils.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H
+#define FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H
+
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Value.h"
+
+namespace mlir {
+class Region;
+} // namespace mlir
+
+namespace fir {
+class FirOpBuilder;
+class ShapeShiftOp;
+} // namespace fir
+
+namespace Fortran {
+namespace lower {
+namespace omp {
+
+/// Generate init and cleanup regions suitable for reduction or privatizer
+/// declarations. `scalarInitValue` may be nullptr if there is no default
+/// initialization (for privatization).
+void populateByRefInitAndCleanupRegions(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Type argType,
+ mlir::Value scalarInitValue,
+ mlir::Block *initBlock,
+ mlir::Value allocatedPrivVarArg,
+ mlir::Value moldArg,
+ mlir::Region &cleanupRegion);
+
+/// Generate a fir::ShapeShift op describing the provided boxed array.
+fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value box);
+
+} // namespace omp
+} // namespace lower
+} // namespace Fortran
+
+#endif // FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 736de2ee511b..2cd21107a916 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -12,6 +12,7 @@
#include "ReductionProcessor.h"
+#include "PrivateReductionUtils.h"
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/SymbolMap.h"
@@ -294,33 +295,6 @@ mlir::Value ReductionProcessor::createScalarCombiner(
return reductionOp;
}
-/// Generate a fir::ShapeShift op describing the provided boxed array.
-static fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder,
- mlir::Location loc, mlir::Value box) {
- fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>(
- hlfir::getFortranElementOrSequenceType(box.getType()));
- const unsigned rank = sequenceType.getDimension();
- llvm::SmallVector<mlir::Value> lbAndExtents;
- lbAndExtents.reserve(rank * 2);
-
- mlir::Type idxTy = builder.getIndexType();
- for (unsigned i = 0; i < rank; ++i) {
- // TODO: ideally we want to hoist box reads out of the critical section.
- // We could do this by having box dimensions in block arguments like
- // OpenACC does
- mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
- auto dimInfo =
- builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
- lbAndExtents.push_back(dimInfo.getLowerBound());
- lbAndExtents.push_back(dimInfo.getExtent());
- }
-
- auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
- auto shapeShift =
- builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents);
- return shapeShift;
-}
-
/// Create reduction combiner region for reduction variables which are boxed
/// arrays
static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
@@ -422,59 +396,6 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
TODO(loc, "OpenMP genCombiner for unsupported reduction variable type");
}
-static void
-createReductionCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc,
- mlir::omp::DeclareReductionOp &reductionDecl) {
- mlir::Type redTy = reductionDecl.getType();
-
- mlir::Region &cleanupRegion = reductionDecl.getCleanupRegion();
- assert(cleanupRegion.empty());
- mlir::Block *block =
- builder.createBlock(&cleanupRegion, cleanupRegion.end(), {redTy}, {loc});
- builder.setInsertionPointToEnd(block);
-
- auto typeError = [loc]() {
- fir::emitFatalError(loc,
- "Attempt to create an omp reduction cleanup region "
- "for a type that wasn't allocated",
- /*genCrashDiag=*/true);
- };
-
- mlir::Type valTy = fir::unwrapRefType(redTy);
- if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) {
- if (!mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy())) {
- mlir::Type innerTy = fir::extractSequenceType(boxTy);
- if (!mlir::isa<fir::SequenceType>(innerTy))
- typeError();
- }
-
- mlir::Value arg = block->getArgument(0);
- arg = builder.loadIfRef(loc, arg);
- assert(mlir::isa<fir::BaseBoxType>(arg.getType()));
-
- // Deallocate box
- // The FIR type system doesn't nesecarrily know that this is a mutable box
- // if we allocated the thread local array on the heap to avoid looped stack
- // allocations.
- mlir::Value addr =
- hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg});
- mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr);
- fir::IfOp ifOp =
- builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false);
- builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-
- mlir::Value cast = builder.createConvert(
- loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr);
- builder.create<fir::FreeMemOp>(loc, cast);
-
- builder.setInsertionPointAfter(ifOp);
- builder.create<mlir::omp::YieldOp>(loc);
- return;
- }
-
- typeError();
-}
-
// like fir::unwrapSeqOrBoxedSeqType except it also works for non-sequence boxes
static mlir::Type unwrapSeqOrBoxedType(mlir::Type ty) {
if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty))
@@ -517,154 +438,31 @@ static void createReductionAllocAndInitRegions(
mlir::Value initValue = ReductionProcessor::getReductionInitValue(
loc, unwrapSeqOrBoxedType(ty), redId, builder);
+ if (isByRef) {
+ populateByRefInitAndCleanupRegions(builder, loc, type, initValue, initBlock,
+ reductionDecl.getInitializerAllocArg(),
+ reductionDecl.getInitializerMoldArg(),
+ reductionDecl.getCleanupRegion());
+ }
+
if (fir::isa_trivial(ty)) {
if (isByRef) {
// alloc region
- {
- builder.setInsertionPointToEnd(allocBlock);
- mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty);
- yield(alloca);
- }
-
- // init region
- {
- builder.setInsertionPointToEnd(initBlock);
- // block arg is mapped to the alloca yielded from the alloc region
- mlir::Value alloc = reductionDecl.getInitializerAllocArg();
- builder.createStoreWithConvert(loc, initValue, alloc);
- yield(alloc);
- }
+ builder.setInsertionPointToEnd(allocBlock);
+ mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty);
+ yield(alloca);
return;
}
// by val
yield(initValue);
return;
}
+ assert(isByRef && "passing non-trivial types by val is unsupported");
- // check if an allocatable box is unallocated. If so, initialize the boxAlloca
- // to be unallocated e.g.
- // %box_alloca = fir.alloca !fir.box<!fir.heap<...>>
- // %addr = fir.box_addr %box
- // if (%addr == 0) {
- // %nullbox = fir.embox %addr
- // fir.store %nullbox to %box_alloca
- // } else {
- // // ...
- // fir.store %something to %box_alloca
- // }
- // omp.yield %box_alloca
- mlir::Value moldArg =
- builder.loadIfRef(loc, reductionDecl.getInitializerMoldArg());
- auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp {
- mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, moldArg);
- mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr);
- fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated,
- /*withElseRegion=*/true);
- builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
- // just embox the null address and return
- mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr);
- builder.create<fir::StoreOp>(loc, nullBox, boxAlloca);
- return ifOp;
- };
-
- // all arrays are boxed
- if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
- assert(isByRef && "passing boxes by value is unsupported");
- bool isAllocatableOrPointer =
- mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy());
-
- // alloc region
- {
- builder.setInsertionPointToEnd(allocBlock);
- mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty);
- yield(boxAlloca);
- }
-
- // init region
- builder.setInsertionPointToEnd(initBlock);
- mlir::Value boxAlloca = reductionDecl.getInitializerAllocArg();
- mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy());
- if (fir::isa_trivial(innerTy)) {
- // boxed non-sequence value e.g. !fir.box<!fir.heap<i32>>
- if (!isAllocatableOrPointer)
- TODO(loc, "Reduction of non-allocatable trivial typed box");
-
- fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca);
-
- builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
- mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy);
- builder.createStoreWithConvert(loc, initValue, valAlloc);
- mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc);
- builder.create<fir::StoreOp>(loc, box, boxAlloca);
-
- auto insPt = builder.saveInsertionPoint();
- createReductionCleanupRegion(builder, loc, reductionDecl);
- builder.restoreInsertionPoint(insPt);
- builder.setInsertionPointAfter(ifUnallocated);
- yield(boxAlloca);
- return;
- }
- innerTy = fir::extractSequenceType(boxTy);
- if (!mlir::isa<fir::SequenceType>(innerTy))
- TODO(loc, "Unsupported boxed type for reduction");
-
- fir::IfOp ifUnallocated{nullptr};
- if (isAllocatableOrPointer) {
- ifUnallocated = handleNullAllocatable(boxAlloca);
- builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
- }
-
- // Create the private copy from the initial fir.box:
- mlir::Value loadedBox = builder.loadIfRef(loc, moldArg);
- hlfir::Entity source = hlfir::Entity{loadedBox};
-
- // Allocating on the heap in case the whole reduction is nested inside of a
- // loop
- // TODO: compare performance here to using allocas - this could be made to
- // work by inserting stacksave/stackrestore around the reduction in
- // openmpirbuilder
- auto [temp, needsDealloc] = createTempFromMold(loc, builder, source);
- // if needsDealloc isn't statically false, add cleanup region. Always
- // do this for allocatable boxes because they might have been re-allocated
- // in the body of the loop/parallel region
-
- std::optional<int64_t> cstNeedsDealloc =
- fir::getIntIfConstant(needsDealloc);
- assert(cstNeedsDealloc.has_value() &&
- "createTempFromMold decides this statically");
- if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
- mlir::OpBuilder::InsertionGuard guard(builder);
- createReductionCleanupRegion(builder, loc, reductionDecl);
- } else {
- assert(!isAllocatableOrPointer &&
- "Pointer-like arrays must be heap allocated");
- }
-
- // Put the temporary inside of a box:
- // hlfir::genVariableBox doesn't handle non-default lower bounds
- mlir::Value box;
- fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox);
- mlir::Type boxType = loadedBox.getType();
- if (mlir::isa<fir::BaseBoxType>(temp.getType()))
- // the box created by the declare form createTempFromMold is missing lower
- // bounds info
- box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift,
- /*shift=*/mlir::Value{});
- else
- box = builder.create<fir::EmboxOp>(
- loc, boxType, temp, shapeShift,
- /*slice=*/mlir::Value{},
- /*typeParams=*/llvm::ArrayRef<mlir::Value>{});
-
- builder.create<hlfir::AssignOp>(loc, initValue, box);
- builder.create<fir::StoreOp>(loc, box, boxAlloca);
- if (ifUnallocated)
- builder.setInsertionPointAfter(ifUnallocated);
- yield(boxAlloca);
- return;
- }
-
- TODO(loc, "createReductionInitRegion for unsupported type");
+ // alloc region
+ builder.setInsertionPointToEnd(allocBlock);
+ mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty);
+ yield(boxAlloca);
}
mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction(