diff options
Diffstat (limited to 'flang/lib/Lower')
| -rw-r--r-- | flang/lib/Lower/Allocatable.cpp | 37 | ||||
| -rw-r--r-- | flang/lib/Lower/Bridge.cpp | 8 | ||||
| -rw-r--r-- | flang/lib/Lower/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 2 | ||||
| -rw-r--r-- | flang/lib/Lower/OpenMP/DataSharingProcessor.cpp | 3 | ||||
| -rw-r--r-- | flang/lib/Lower/OpenMP/OpenMP.cpp | 62 | ||||
| -rw-r--r-- | flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp | 236 | ||||
| -rw-r--r-- | flang/lib/Lower/OpenMP/PrivateReductionUtils.h | 51 | ||||
| -rw-r--r-- | flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 234 |
9 files changed, 397 insertions, 237 deletions
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index fb8380ac7e8c..dc135543fafc 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -22,12 +22,14 @@ #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/Runtime.h" #include "flang/Lower/StatementContext.h" +#include "flang/Optimizer/Builder/CUFCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/FatalError.h" #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Parser/parse-tree.h" @@ -452,6 +454,19 @@ private: alloc.getSymbol()); } + void setPinnedToFalse() { + if (!pinnedExpr) + return; + Fortran::lower::StatementContext stmtCtx; + mlir::Value pinned = + fir::getBase(converter.genExprAddr(loc, *pinnedExpr, stmtCtx)); + mlir::Location loc = pinned.getLoc(); + mlir::Value falseValue = builder.createBool(loc, false); + mlir::Value falseConv = builder.createConvert( + loc, fir::unwrapRefType(pinned.getType()), falseValue); + builder.create<fir::StoreOp>(loc, falseConv, pinned); + } + void genSimpleAllocation(const Allocation &alloc, const fir::MutableBoxValue &box) { bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol()); @@ -467,6 +482,7 @@ private: // can be validated. genInlinedAllocation(alloc, box); postAllocationAction(alloc); + setPinnedToFalse(); return; } @@ -480,11 +496,13 @@ private: genSetDeferredLengthParameters(alloc, box); genAllocateObjectBounds(alloc, box); mlir::Value stat; - if (!isCudaSymbol) + if (!isCudaSymbol) { stat = genRuntimeAllocate(builder, loc, box, errorManager); - else + setPinnedToFalse(); + } else { stat = genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol()); + } fir::factory::syncMutableBoxFromIRBox(builder, loc, box); postAllocationAction(alloc); errorManager.assignStat(builder, loc, stat); @@ -614,13 +632,16 @@ private: genSetDeferredLengthParameters(alloc, box); genAllocateObjectBounds(alloc, box); mlir::Value stat; - if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) + if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) { stat = genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol()); - else if (isSource) - stat = genRuntimeAllocateSource(builder, loc, box, exv, errorManager); - else - stat = genRuntimeAllocate(builder, loc, box, errorManager); + } else { + if (isSource) + stat = genRuntimeAllocateSource(builder, loc, box, exv, errorManager); + else + stat = genRuntimeAllocate(builder, loc, box, errorManager); + setPinnedToFalse(); + } fir::factory::syncMutableBoxFromIRBox(builder, loc, box); postAllocationAction(alloc); errorManager.assignStat(builder, loc, stat); @@ -1093,11 +1114,13 @@ void Fortran::lower::associateMutableBox( fir::FirOpBuilder &builder = converter.getFirOpBuilder(); if (Fortran::evaluate::UnwrapExpr<Fortran::evaluate::NullPointer>(source)) { fir::factory::disassociateMutableBox(builder, loc, box); + cuf::genPointerSync(box.getAddr(), builder); return; } if (converter.getLoweringOptions().getLowerToHighLevelFIR()) { fir::ExtendedValue rhs = converter.genExprAddr(loc, source, stmtCtx); fir::factory::associateMutableBox(builder, loc, box, rhs, lbounds); + cuf::genPointerSync(box.getAddr(), builder); return; } // The right hand side is not be evaluated into a temp. Array sections can diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index c7e2635230e9..37f51d74d23f 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -34,6 +34,7 @@ #include "flang/Lower/StatementContext.h" #include "flang/Lower/Support/Utils.h" #include "flang/Optimizer/Builder/BoxValue.h" +#include "flang/Optimizer/Builder/CUFCommon.h" #include "flang/Optimizer/Builder/Character.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Runtime/Assign.h" @@ -832,7 +833,11 @@ public: if_builder.end(); }, [&](const auto &) -> void { - if (skipDefaultInit) + // Always initialize allocatable component descriptor, even when the + // value is later copied from the host (e.g. firstprivate) because the + // assignment from the host to the copy will fail if the component + // descriptors are not initialized. + if (skipDefaultInit && !hlfir::mayHaveAllocatableComponent(hSymType)) return; // Initialize local/private derived types with default // initialization (Fortran 2023 section 11.1.7.5 and OpenMP 5.2 @@ -3952,6 +3957,7 @@ private: } else { fir::MutableBoxValue box = genExprMutableBox(loc, *expr); fir::factory::disassociateMutableBox(*builder, loc, box); + cuf::genPointerSync(box.getAddr(), *builder); } } } diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index ba6622d8504a..f57f0e7a77a0 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -29,6 +29,7 @@ add_flang_library(FortranLower OpenMP/DataSharingProcessor.cpp OpenMP/Decomposer.cpp OpenMP/OpenMP.cpp + OpenMP/PrivateReductionUtils.cpp OpenMP/ReductionProcessor.cpp OpenMP/Utils.cpp PFTBuilder.cpp diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index c4ab5e0033d0..fb8e007c7af5 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -613,6 +613,8 @@ addAlignedClause(lower::AbstractConverter &converter, // Do not generate alignment assumption if alignment is less than or equal to // 0. if (alignment > 0) { + // alignment value must be power of 2 + assert((alignment & (alignment - 1)) == 0 && "alignment is not power of 2"); auto &objects = std::get<omp::ObjectList>(clause.t); if (!objects.empty()) genObjectList(objects, converter, alignedVars); diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index cd312537551e..9dfdbd8337ae 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -126,7 +126,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) { assert(sb); mlir::Value addr = sb.getAddr(); assert(addr); - return hlfir::mayHaveAllocatableComponent(addr.getType()); + return !fir::isPointerType(addr.getType()) && + hlfir::mayHaveAllocatableComponent(addr.getType()); }; if (needInitClone()) { diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index b07e89d201d1..cd4b25a17722 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -923,13 +923,24 @@ static void genBodyOfTargetOp( while (!valuesDefinedAbove.empty()) { for (mlir::Value val : valuesDefinedAbove) { mlir::Operation *valOp = val.getDefiningOp(); - if (mlir::isMemoryEffectFree(valOp)) { + assert(valOp != nullptr); + + // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the + // indices separately, as the alternative is to eventually map the Box, + // which comes with a fairly large overhead comparatively. We could be + // more robust about this and check using a BackwardsSlice to see if we + // run the risk of mapping a box. + if (mlir::isMemoryEffectFree(valOp) && + !mlir::isa<fir::BoxDimsOp>(valOp)) { mlir::Operation *clonedOp = valOp->clone(); entryBlock->push_front(clonedOp); - val.replaceUsesWithIf(clonedOp->getResult(0), - [entryBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == entryBlock; - }); + + auto replace = [entryBlock](mlir::OpOperand &use) { + return use.getOwner()->getBlock() == entryBlock; + }; + + valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace); + valOp->replaceUsesWithIf(clonedOp, replace); } else { auto savedIP = firOpBuilder.getInsertionPoint(); firOpBuilder.setInsertionPointAfter(valOp); @@ -937,9 +948,36 @@ static void genBodyOfTargetOp( firOpBuilder.createTemporary(val.getLoc(), val.getType()); firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); - llvm::SmallVector<mlir::Value> bounds; + lower::AddrAndBoundsInfo info = lower::getDataOperandBaseAddr( + firOpBuilder, val, /*isOptional=*/false, val.getLoc()); + llvm::SmallVector<mlir::Value> bounds = + Fortran::lower::genImplicitBoundsOps<mlir::omp::MapBoundsOp, + mlir::omp::MapBoundsType>( + firOpBuilder, info, + hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder, + hlfir::Entity{val}) + .first, + /*dataExvIsAssumedSize=*/false, val.getLoc()); + std::stringstream name; firOpBuilder.setInsertionPoint(targetOp); + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + + mlir::Type eleType = copyVal.getType(); + if (auto refType = + mlir::dyn_cast<fir::ReferenceType>(copyVal.getType())) + eleType = refType.getElementType(); + + if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { + captureKind = mlir::omp::VariableCaptureKind::ByCopy; + } else if (!fir::isa_builtin_cptr_type(eleType)) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + } + mlir::Value mapOp = createMapInfoOp( firOpBuilder, copyVal.getLoc(), copyVal, /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, @@ -947,8 +985,8 @@ static void genBodyOfTargetOp( /*membersIndex=*/mlir::ArrayAttr{}, static_cast< std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), - mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); + mapFlag), + captureKind, copyVal.getType()); // Get the index of the first non-map argument before modifying mapVars, // then append an element to mapVars and an associated entry block @@ -2586,6 +2624,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, //===----------------------------------------------------------------------===// // OpenMPDeclarativeConstruct visitors //===----------------------------------------------------------------------===// +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPUtilityConstruct &); static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, @@ -2907,8 +2949,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - const parser::OpenMPErrorConstruct &) { - TODO(converter.getCurrentLocation(), "OpenMPErrorConstruct"); + const parser::OpenMPUtilityConstruct &) { + TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp new file mode 100644 index 000000000000..83f0d4e93ca5 --- /dev/null +++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp @@ -0,0 +1,236 @@ +//===-- PrivateReductionUtils.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "PrivateReductionUtils.h" + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Support/FatalError.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/Location.h" + +static void createCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Type argType, + mlir::Region &cleanupRegion) { + assert(cleanupRegion.empty()); + mlir::Block *block = builder.createBlock(&cleanupRegion, cleanupRegion.end(), + {argType}, {loc}); + builder.setInsertionPointToEnd(block); + + auto typeError = [loc]() { + fir::emitFatalError(loc, + "Attempt to create an omp cleanup region " + "for a type that wasn't allocated", + /*genCrashDiag=*/true); + }; + + mlir::Type valTy = fir::unwrapRefType(argType); + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) { + if (!mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy())) { + mlir::Type innerTy = fir::extractSequenceType(boxTy); + if (!mlir::isa<fir::SequenceType>(innerTy)) + typeError(); + } + + mlir::Value arg = builder.loadIfRef(loc, block->getArgument(0)); + assert(mlir::isa<fir::BaseBoxType>(arg.getType())); + + // Deallocate box + // The FIR type system doesn't nesecarrily know that this is a mutable box + // if we allocated the thread local array on the heap to avoid looped stack + // allocations. + mlir::Value addr = + hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg}); + mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr); + fir::IfOp ifOp = + builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + + mlir::Value cast = builder.createConvert( + loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr); + builder.create<fir::FreeMemOp>(loc, cast); + + builder.setInsertionPointAfter(ifOp); + builder.create<mlir::omp::YieldOp>(loc); + return; + } + + typeError(); +} + +fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder, + mlir::Location loc, + mlir::Value box) { + fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>( + hlfir::getFortranElementOrSequenceType(box.getType())); + const unsigned rank = sequenceType.getDimension(); + llvm::SmallVector<mlir::Value> lbAndExtents; + lbAndExtents.reserve(rank * 2); + + mlir::Type idxTy = builder.getIndexType(); + for (unsigned i = 0; i < rank; ++i) { + // TODO: ideally we want to hoist box reads out of the critical section. + // We could do this by having box dimensions in block arguments like + // OpenACC does + mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); + auto dimInfo = + builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim); + lbAndExtents.push_back(dimInfo.getLowerBound()); + lbAndExtents.push_back(dimInfo.getExtent()); + } + + auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); + auto shapeShift = + builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents); + return shapeShift; +} + +void Fortran::lower::omp::populateByRefInitAndCleanupRegions( + fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argType, + mlir::Value scalarInitValue, mlir::Block *initBlock, + mlir::Value allocatedPrivVarArg, mlir::Value moldArg, + mlir::Region &cleanupRegion) { + mlir::Type ty = fir::unwrapRefType(argType); + builder.setInsertionPointToEnd(initBlock); + auto yield = [&](mlir::Value ret) { + builder.create<mlir::omp::YieldOp>(loc, ret); + }; + + if (fir::isa_trivial(ty)) { + builder.setInsertionPointToEnd(initBlock); + + if (scalarInitValue) + builder.createStoreWithConvert(loc, scalarInitValue, allocatedPrivVarArg); + yield(allocatedPrivVarArg); + return; + } + + // check if an allocatable box is unallocated. If so, initialize the boxAlloca + // to be unallocated e.g. + // %box_alloca = fir.alloca !fir.box<!fir.heap<...>> + // %addr = fir.box_addr %box + // if (%addr == 0) { + // %nullbox = fir.embox %addr + // fir.store %nullbox to %box_alloca + // } else { + // // ... + // fir.store %something to %box_alloca + // } + // omp.yield %box_alloca + moldArg = builder.loadIfRef(loc, moldArg); + auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp { + mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, moldArg); + mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr); + fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + // just embox the null address and return + mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr); + builder.create<fir::StoreOp>(loc, nullBox, boxAlloca); + return ifOp; + }; + + // all arrays are boxed + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) { + bool isAllocatableOrPointer = + mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy()); + + builder.setInsertionPointToEnd(initBlock); + mlir::Value boxAlloca = allocatedPrivVarArg; + mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy()); + if (fir::isa_trivial(innerTy)) { + // boxed non-sequence value e.g. !fir.box<!fir.heap<i32>> + if (!isAllocatableOrPointer) + TODO(loc, + "Reduction/Privatization of non-allocatable trivial typed box"); + + fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca); + + builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); + mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy); + if (scalarInitValue) + builder.createStoreWithConvert(loc, scalarInitValue, valAlloc); + mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc); + builder.create<fir::StoreOp>(loc, box, boxAlloca); + + createCleanupRegion(builder, loc, argType, cleanupRegion); + builder.setInsertionPointAfter(ifUnallocated); + yield(boxAlloca); + return; + } + innerTy = fir::extractSequenceType(boxTy); + if (!mlir::isa<fir::SequenceType>(innerTy)) + TODO(loc, "Unsupported boxed type for reduction/privatization"); + + fir::IfOp ifUnallocated{nullptr}; + if (isAllocatableOrPointer) { + ifUnallocated = handleNullAllocatable(boxAlloca); + builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); + } + + // Create the private copy from the initial fir.box: + mlir::Value loadedBox = builder.loadIfRef(loc, moldArg); + hlfir::Entity source = hlfir::Entity{loadedBox}; + + // Allocating on the heap in case the whole reduction is nested inside of a + // loop + // TODO: compare performance here to using allocas - this could be made to + // work by inserting stacksave/stackrestore around the reduction in + // openmpirbuilder + auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); + // if needsDealloc isn't statically false, add cleanup region. Always + // do this for allocatable boxes because they might have been re-allocated + // in the body of the loop/parallel region + + std::optional<int64_t> cstNeedsDealloc = + fir::getIntIfConstant(needsDealloc); + assert(cstNeedsDealloc.has_value() && + "createTempFromMold decides this statically"); + if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) { + mlir::OpBuilder::InsertionGuard guard(builder); + createCleanupRegion(builder, loc, argType, cleanupRegion); + } else { + assert(!isAllocatableOrPointer && + "Pointer-like arrays must be heap allocated"); + } + + // Put the temporary inside of a box: + // hlfir::genVariableBox doesn't handle non-default lower bounds + mlir::Value box; + fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox); + mlir::Type boxType = loadedBox.getType(); + if (mlir::isa<fir::BaseBoxType>(temp.getType())) + // the box created by the declare form createTempFromMold is missing lower + // bounds info + box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift, + /*shift=*/mlir::Value{}); + else + box = builder.create<fir::EmboxOp>( + loc, boxType, temp, shapeShift, + /*slice=*/mlir::Value{}, + /*typeParams=*/llvm::ArrayRef<mlir::Value>{}); + + if (scalarInitValue) + builder.create<hlfir::AssignOp>(loc, scalarInitValue, box); + builder.create<fir::StoreOp>(loc, box, boxAlloca); + if (ifUnallocated) + builder.setInsertionPointAfter(ifUnallocated); + yield(boxAlloca); + return; + } + + TODO(loc, + "creating reduction/privatization init region for unsupported type"); + return; +} diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h new file mode 100644 index 000000000000..b4abc40cd4b6 --- /dev/null +++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h @@ -0,0 +1,51 @@ +//===-- Lower/OpenMP/PrivateReductionUtils.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H +#define FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H + +#include "mlir/IR/Location.h" +#include "mlir/IR/Value.h" + +namespace mlir { +class Region; +} // namespace mlir + +namespace fir { +class FirOpBuilder; +class ShapeShiftOp; +} // namespace fir + +namespace Fortran { +namespace lower { +namespace omp { + +/// Generate init and cleanup regions suitable for reduction or privatizer +/// declarations. `scalarInitValue` may be nullptr if there is no default +/// initialization (for privatization). +void populateByRefInitAndCleanupRegions(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Type argType, + mlir::Value scalarInitValue, + mlir::Block *initBlock, + mlir::Value allocatedPrivVarArg, + mlir::Value moldArg, + mlir::Region &cleanupRegion); + +/// Generate a fir::ShapeShift op describing the provided boxed array. +fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value box); + +} // namespace omp +} // namespace lower +} // namespace Fortran + +#endif // FORTRAN_LOWER_OPENMP_PRIVATEREDUCTIONUTILS_H diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 736de2ee511b..2cd21107a916 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -12,6 +12,7 @@ #include "ReductionProcessor.h" +#include "PrivateReductionUtils.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/ConvertType.h" #include "flang/Lower/SymbolMap.h" @@ -294,33 +295,6 @@ mlir::Value ReductionProcessor::createScalarCombiner( return reductionOp; } -/// Generate a fir::ShapeShift op describing the provided boxed array. -static fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, - mlir::Location loc, mlir::Value box) { - fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>( - hlfir::getFortranElementOrSequenceType(box.getType())); - const unsigned rank = sequenceType.getDimension(); - llvm::SmallVector<mlir::Value> lbAndExtents; - lbAndExtents.reserve(rank * 2); - - mlir::Type idxTy = builder.getIndexType(); - for (unsigned i = 0; i < rank; ++i) { - // TODO: ideally we want to hoist box reads out of the critical section. - // We could do this by having box dimensions in block arguments like - // OpenACC does - mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); - auto dimInfo = - builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim); - lbAndExtents.push_back(dimInfo.getLowerBound()); - lbAndExtents.push_back(dimInfo.getExtent()); - } - - auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); - auto shapeShift = - builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents); - return shapeShift; -} - /// Create reduction combiner region for reduction variables which are boxed /// arrays static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, @@ -422,59 +396,6 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, TODO(loc, "OpenMP genCombiner for unsupported reduction variable type"); } -static void -createReductionCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::omp::DeclareReductionOp &reductionDecl) { - mlir::Type redTy = reductionDecl.getType(); - - mlir::Region &cleanupRegion = reductionDecl.getCleanupRegion(); - assert(cleanupRegion.empty()); - mlir::Block *block = - builder.createBlock(&cleanupRegion, cleanupRegion.end(), {redTy}, {loc}); - builder.setInsertionPointToEnd(block); - - auto typeError = [loc]() { - fir::emitFatalError(loc, - "Attempt to create an omp reduction cleanup region " - "for a type that wasn't allocated", - /*genCrashDiag=*/true); - }; - - mlir::Type valTy = fir::unwrapRefType(redTy); - if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) { - if (!mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy())) { - mlir::Type innerTy = fir::extractSequenceType(boxTy); - if (!mlir::isa<fir::SequenceType>(innerTy)) - typeError(); - } - - mlir::Value arg = block->getArgument(0); - arg = builder.loadIfRef(loc, arg); - assert(mlir::isa<fir::BaseBoxType>(arg.getType())); - - // Deallocate box - // The FIR type system doesn't nesecarrily know that this is a mutable box - // if we allocated the thread local array on the heap to avoid looped stack - // allocations. - mlir::Value addr = - hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg}); - mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr); - fir::IfOp ifOp = - builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - - mlir::Value cast = builder.createConvert( - loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr); - builder.create<fir::FreeMemOp>(loc, cast); - - builder.setInsertionPointAfter(ifOp); - builder.create<mlir::omp::YieldOp>(loc); - return; - } - - typeError(); -} - // like fir::unwrapSeqOrBoxedSeqType except it also works for non-sequence boxes static mlir::Type unwrapSeqOrBoxedType(mlir::Type ty) { if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty)) @@ -517,154 +438,31 @@ static void createReductionAllocAndInitRegions( mlir::Value initValue = ReductionProcessor::getReductionInitValue( loc, unwrapSeqOrBoxedType(ty), redId, builder); + if (isByRef) { + populateByRefInitAndCleanupRegions(builder, loc, type, initValue, initBlock, + reductionDecl.getInitializerAllocArg(), + reductionDecl.getInitializerMoldArg(), + reductionDecl.getCleanupRegion()); + } + if (fir::isa_trivial(ty)) { if (isByRef) { // alloc region - { - builder.setInsertionPointToEnd(allocBlock); - mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty); - yield(alloca); - } - - // init region - { - builder.setInsertionPointToEnd(initBlock); - // block arg is mapped to the alloca yielded from the alloc region - mlir::Value alloc = reductionDecl.getInitializerAllocArg(); - builder.createStoreWithConvert(loc, initValue, alloc); - yield(alloc); - } + builder.setInsertionPointToEnd(allocBlock); + mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty); + yield(alloca); return; } // by val yield(initValue); return; } + assert(isByRef && "passing non-trivial types by val is unsupported"); - // check if an allocatable box is unallocated. If so, initialize the boxAlloca - // to be unallocated e.g. - // %box_alloca = fir.alloca !fir.box<!fir.heap<...>> - // %addr = fir.box_addr %box - // if (%addr == 0) { - // %nullbox = fir.embox %addr - // fir.store %nullbox to %box_alloca - // } else { - // // ... - // fir.store %something to %box_alloca - // } - // omp.yield %box_alloca - mlir::Value moldArg = - builder.loadIfRef(loc, reductionDecl.getInitializerMoldArg()); - auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp { - mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, moldArg); - mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr); - fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated, - /*withElseRegion=*/true); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - // just embox the null address and return - mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr); - builder.create<fir::StoreOp>(loc, nullBox, boxAlloca); - return ifOp; - }; - - // all arrays are boxed - if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) { - assert(isByRef && "passing boxes by value is unsupported"); - bool isAllocatableOrPointer = - mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy()); - - // alloc region - { - builder.setInsertionPointToEnd(allocBlock); - mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty); - yield(boxAlloca); - } - - // init region - builder.setInsertionPointToEnd(initBlock); - mlir::Value boxAlloca = reductionDecl.getInitializerAllocArg(); - mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy()); - if (fir::isa_trivial(innerTy)) { - // boxed non-sequence value e.g. !fir.box<!fir.heap<i32>> - if (!isAllocatableOrPointer) - TODO(loc, "Reduction of non-allocatable trivial typed box"); - - fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca); - - builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); - mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy); - builder.createStoreWithConvert(loc, initValue, valAlloc); - mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc); - builder.create<fir::StoreOp>(loc, box, boxAlloca); - - auto insPt = builder.saveInsertionPoint(); - createReductionCleanupRegion(builder, loc, reductionDecl); - builder.restoreInsertionPoint(insPt); - builder.setInsertionPointAfter(ifUnallocated); - yield(boxAlloca); - return; - } - innerTy = fir::extractSequenceType(boxTy); - if (!mlir::isa<fir::SequenceType>(innerTy)) - TODO(loc, "Unsupported boxed type for reduction"); - - fir::IfOp ifUnallocated{nullptr}; - if (isAllocatableOrPointer) { - ifUnallocated = handleNullAllocatable(boxAlloca); - builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); - } - - // Create the private copy from the initial fir.box: - mlir::Value loadedBox = builder.loadIfRef(loc, moldArg); - hlfir::Entity source = hlfir::Entity{loadedBox}; - - // Allocating on the heap in case the whole reduction is nested inside of a - // loop - // TODO: compare performance here to using allocas - this could be made to - // work by inserting stacksave/stackrestore around the reduction in - // openmpirbuilder - auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); - // if needsDealloc isn't statically false, add cleanup region. Always - // do this for allocatable boxes because they might have been re-allocated - // in the body of the loop/parallel region - - std::optional<int64_t> cstNeedsDealloc = - fir::getIntIfConstant(needsDealloc); - assert(cstNeedsDealloc.has_value() && - "createTempFromMold decides this statically"); - if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) { - mlir::OpBuilder::InsertionGuard guard(builder); - createReductionCleanupRegion(builder, loc, reductionDecl); - } else { - assert(!isAllocatableOrPointer && - "Pointer-like arrays must be heap allocated"); - } - - // Put the temporary inside of a box: - // hlfir::genVariableBox doesn't handle non-default lower bounds - mlir::Value box; - fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox); - mlir::Type boxType = loadedBox.getType(); - if (mlir::isa<fir::BaseBoxType>(temp.getType())) - // the box created by the declare form createTempFromMold is missing lower - // bounds info - box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift, - /*shift=*/mlir::Value{}); - else - box = builder.create<fir::EmboxOp>( - loc, boxType, temp, shapeShift, - /*slice=*/mlir::Value{}, - /*typeParams=*/llvm::ArrayRef<mlir::Value>{}); - - builder.create<hlfir::AssignOp>(loc, initValue, box); - builder.create<fir::StoreOp>(loc, box, boxAlloca); - if (ifUnallocated) - builder.setInsertionPointAfter(ifUnallocated); - yield(boxAlloca); - return; - } - - TODO(loc, "createReductionInitRegion for unsupported type"); + // alloc region + builder.setInsertionPointToEnd(allocBlock); + mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty); + yield(boxAlloca); } mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction( |
