diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /flang/lib/Lower | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'flang/lib/Lower')
25 files changed, 661 insertions, 544 deletions
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 444b5b6c7c4b..53239cb83c6c 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -450,9 +450,6 @@ private: if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare)) Fortran::lower::attachDeclarePostAllocAction(converter, builder, alloc.getSymbol()); - if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol())) - Fortran::lower::initializeDeviceComponentAllocator( - converter, alloc.getSymbol(), box); } void setPinnedToFalse() { diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index c003a5b04ecd..6125ea915366 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -72,6 +72,7 @@ #include "mlir/Parser/Parser.h" #include "mlir/Support/StateStack.h" #include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/CommandLine.h" @@ -631,6 +632,17 @@ public: addSymbol(sym, exval, /*forced=*/true); } + void bindSymbolStorage( + Fortran::lower::SymbolRef sym, + Fortran::lower::SymMap::StorageDesc storage) override final { + localSymbols.registerStorage(sym, std::move(storage)); + } + + Fortran::lower::SymMap::StorageDesc + getSymbolStorage(Fortran::lower::SymbolRef sym) override final { + return localSymbols.lookupStorage(sym); + } + void overrideExprValues(const Fortran::lower::ExprToValueMap *map) override final { exprValueOverrides = map; @@ -2187,6 +2199,11 @@ private: // Loops with induction variables inside OpenACC compute constructs // need special handling to ensure that the IVs are privatized. if (Fortran::lower::isInsideOpenACCComputeConstruct(*builder)) { + // Open up a new scope for the loop variables. + localSymbols.pushScope(); + auto scopeGuard = + llvm::make_scope_exit([&]() { localSymbols.popScope(); }); + mlir::Operation *loopOp = Fortran::lower::genOpenACCLoopFromDoConstruct( *this, bridge.getSemanticsContext(), localSymbols, doConstruct, eval); bool success = loopOp != nullptr; @@ -2203,6 +2220,8 @@ private: for (auto end = --eval.getNestedEvaluations().end(); iter != end; ++iter) genFIR(*iter, unstructuredContext); + + builder->setInsertionPointAfter(loopOp); return; } // Fall back to normal loop handling. diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index 1d1c7ddda8e9..eb4d57d733dd 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -60,6 +60,7 @@ add_flang_library(FortranLower FortranParser FortranEvaluate FortranSemantics + FortranUtils LINK_COMPONENTS Support diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp index 1293d2c5bd3a..bb4bdee78f97 100644 --- a/flang/lib/Lower/CUDA.cpp +++ b/flang/lib/Lower/CUDA.cpp @@ -17,95 +17,6 @@ #define DEBUG_TYPE "flang-lower-cuda" -void Fortran::lower::initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) { - if (const auto *details{ - sym.GetUltimate() - .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType()); - - // Only pointer and allocatable needs post allocation initialization - // of components descriptors. - if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy)) - return; - - // Extract the derived type. - mlir::Type ty = fir::getDerivedType(baseTy); - auto recTy = mlir::dyn_cast<fir::RecordType>(ty); - assert(recTy && "expected fir::RecordType"); - - if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr()); - mlir::Value addr; - if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) { - mlir::Type idxTy = builder.getIndexType(); - mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); - mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); - llvm::SmallVector<fir::DoLoopOp> loops; - llvm::SmallVector<mlir::Value> indices; - llvm::SmallVector<mlir::Value> extents; - for (unsigned i = 0; i < seqTy.getDimension(); ++i) { - mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); - auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy, - idxTy, loadedBox, dim); - mlir::Value lbub = mlir::arith::AddIOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1)); - mlir::Value ext = - mlir::arith::SubIOp::create(builder, loc, lbub, one); - mlir::Value cmp = mlir::arith::CmpIOp::create( - builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero); - ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero); - extents.push_back(ext); - - auto loop = fir::DoLoopOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1), - dimInfo.getResult(2), /*isUnordered=*/true, - /*finalCount=*/false, mlir::ValueRange{}); - loops.push_back(loop); - indices.push_back(loop.getInductionVar()); - builder.setInsertionPointToStart(loop.getBody()); - } - mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox); - auto shape = fir::ShapeOp::create(builder, loc, extents); - addr = fir::ArrayCoorOp::create( - builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape, - /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{}); - } else { - addr = fir::BoxAddrOp::create(builder, loc, loadedBox); - } - for (const auto &compSym : components) { - if (Fortran::semantics::IsDeviceAllocatable(compSym)) { - llvm::SmallVector<mlir::Value> coord; - mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType( - builder, loc, compSym, recTy, coord); - assert(coord.size() == 1 && "expect one coordinate"); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), addr, coord[0]); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), compSym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType( fir::FirOpBuilder &builder, mlir::Location loc, const Fortran::semantics::Symbol &sym, fir::RecordType recTy, diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index 72431a9cfacc..c3284cd936f8 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -1766,6 +1766,17 @@ mlir::Type Fortran::lower::getDummyProcedureType( return procType; } +mlir::Type Fortran::lower::getDummyProcedurePointerType( + const Fortran::semantics::Symbol &dummyProcPtr, + Fortran::lower::AbstractConverter &converter) { + std::optional<Fortran::evaluate::characteristics::Procedure> iface = + Fortran::evaluate::characteristics::Procedure::Characterize( + dummyProcPtr, converter.getFoldingContext()); + mlir::Type procPtrType = getProcedureDesignatorType( + iface.has_value() ? &*iface : nullptr, converter); + return fir::ReferenceType::get(procPtrType); +} + bool Fortran::lower::isCPtrArgByValueType(mlir::Type ty) { return mlir::isa<fir::ReferenceType>(ty) && fir::isa_integer(fir::unwrapRefType(ty)); diff --git a/flang/lib/Lower/ConvertArrayConstructor.cpp b/flang/lib/Lower/ConvertArrayConstructor.cpp index 87824110b4a0..006f022b5379 100644 --- a/flang/lib/Lower/ConvertArrayConstructor.cpp +++ b/flang/lib/Lower/ConvertArrayConstructor.cpp @@ -315,9 +315,8 @@ public: mlir::Value tempStorage = builder.createHeapTemporary( loc, declaredType, tempName, extents, lengths); mlir::Value shape = builder.genShape(loc, extents); - declare = hlfir::DeclareOp::create( - builder, loc, tempStorage, tempName, shape, lengths, - /*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{}); + declare = hlfir::DeclareOp::create(builder, loc, tempStorage, tempName, + shape, lengths); initialBoxValue = builder.createBox(loc, boxType, declare->getOriginalBase(), shape, /*slice=*/mlir::Value{}, lengths, /*tdesc=*/{}); diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index bf713f5a0bc4..3951401ebed3 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -17,6 +17,7 @@ #include "flang/Lower/ConvertVariable.h" #include "flang/Lower/CustomIntrinsicCall.h" #include "flang/Lower/HlfirIntrinsics.h" +#include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" #include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" @@ -287,6 +288,16 @@ static void remapActualToDummyDescriptors( } } +static void +getResultLengthFromElementalOp(fir::FirOpBuilder &builder, + llvm::SmallVectorImpl<mlir::Value> &lengths) { + auto elemental = llvm::dyn_cast_or_null<hlfir::ElementalOp>( + builder.getInsertionBlock()->getParentOp()); + if (elemental) + for (mlir::Value len : elemental.getTypeparams()) + lengths.push_back(len); +} + std::pair<Fortran::lower::LoweredResult, bool> Fortran::lower::genCallOpAndResult( mlir::Location loc, Fortran::lower::AbstractConverter &converter, @@ -296,7 +307,13 @@ Fortran::lower::genCallOpAndResult( fir::FirOpBuilder &builder = converter.getFirOpBuilder(); using PassBy = Fortran::lower::CallerInterface::PassEntityBy; bool mustPopSymMap = false; - if (caller.mustMapInterfaceSymbolsForResult()) { + + llvm::SmallVector<mlir::Value> resultLengths; + if (isElemental) + getResultLengthFromElementalOp(builder, resultLengths); + if (caller.mustMapInterfaceSymbolsForResult() && resultLengths.empty()) { + // Do not map the dummy symbols again inside the loop to compute elemental + // function result whose length was already computed outside of the loop. symMap.pushScope(); mustPopSymMap = true; Fortran::lower::mapCallInterfaceSymbolsForResult(converter, caller, symMap); @@ -340,7 +357,6 @@ Fortran::lower::genCallOpAndResult( loc, idxTy, fir::getBase(converter.genExprValue(expr, stmtCtx))); return fir::factory::genMaxWithZero(builder, loc, convertExpr); }; - llvm::SmallVector<mlir::Value> resultLengths; mlir::Value arrayResultShape; hlfir::EvaluateInMemoryOp evaluateInMemory; auto allocatedResult = [&]() -> std::optional<fir::ExtendedValue> { @@ -355,11 +371,16 @@ Fortran::lower::genCallOpAndResult( assert(!isAssumedSizeExtent && "result cannot be assumed-size"); extents.emplace_back(lowerSpecExpr(e)); }); - caller.walkResultLengths( - [&](const Fortran::lower::SomeExpr &e, bool isAssumedSizeExtent) { - assert(!isAssumedSizeExtent && "result cannot be assumed-size"); - lengths.emplace_back(lowerSpecExpr(e)); - }); + if (resultLengths.empty()) { + caller.walkResultLengths( + [&](const Fortran::lower::SomeExpr &e, bool isAssumedSizeExtent) { + assert(!isAssumedSizeExtent && "result cannot be assumed-size"); + lengths.emplace_back(lowerSpecExpr(e)); + }); + } else { + // Use lengths precomputed before elemental loops. + lengths = resultLengths; + } // Result length parameters should not be provided to box storage // allocation and save_results, but they are still useful information to @@ -494,10 +515,19 @@ Fortran::lower::genCallOpAndResult( // arguments of any type and vice versa. mlir::Value cast; auto *context = builder.getContext(); - if (mlir::isa<fir::BoxProcType>(snd) && - mlir::isa<mlir::FunctionType>(fst.getType())) { - auto funcTy = mlir::FunctionType::get(context, {}, {}); - auto boxProcTy = builder.getBoxProcType(funcTy); + + // Special handling for %VAL arguments: internal procedures expect + // reference parameters. When %VAL is used, the argument should be + // passed by value. Pass the originally loaded value. + if (fir::isa_ref_type(snd) && !fir::isa_ref_type(fst.getType()) && + fir::dyn_cast_ptrEleTy(snd) == fst.getType()) { + auto loadOp = mlir::cast<fir::LoadOp>(fst.getDefiningOp()); + mlir::Value originalStorage = loadOp.getMemref(); + cast = originalStorage; + } else if (mlir::isa<fir::BoxProcType>(snd) && + mlir::isa<mlir::FunctionType>(fst.getType())) { + mlir::FunctionType funcTy = mlir::FunctionType::get(context, {}, {}); + fir::BoxProcType boxProcTy = builder.getBoxProcType(funcTy); if (mlir::Value host = argumentHostAssocs(converter, fst)) { cast = fir::EmboxProcOp::create(builder, loc, boxProcTy, llvm::ArrayRef<mlir::Value>{fst, host}); @@ -880,9 +910,10 @@ struct CallContext { std::optional<mlir::Type> resultType, mlir::Location loc, Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symMap, - Fortran::lower::StatementContext &stmtCtx) + Fortran::lower::StatementContext &stmtCtx, bool doCopyIn = true) : procRef{procRef}, converter{converter}, symMap{symMap}, - stmtCtx{stmtCtx}, resultType{resultType}, loc{loc} {} + stmtCtx{stmtCtx}, resultType{resultType}, loc{loc}, doCopyIn{doCopyIn} { + } fir::FirOpBuilder &getBuilder() { return converter.getFirOpBuilder(); } @@ -924,6 +955,7 @@ struct CallContext { Fortran::lower::StatementContext &stmtCtx; std::optional<mlir::Type> resultType; mlir::Location loc; + bool doCopyIn; }; using ExvAndCleanup = @@ -1161,18 +1193,6 @@ mlir::Value static getZeroLowerBounds(mlir::Location loc, return builder.genShift(loc, lowerBounds); } -static bool -isSimplyContiguous(const Fortran::evaluate::ActualArgument &arg, - Fortran::evaluate::FoldingContext &foldingContext) { - if (const auto *expr = arg.UnwrapExpr()) - return Fortran::evaluate::IsSimplyContiguous(*expr, foldingContext); - const Fortran::semantics::Symbol *sym = arg.GetAssumedTypeDummy(); - assert(sym && - "expect ActualArguments to be expression or assumed-type symbols"); - return sym->Rank() == 0 || - Fortran::evaluate::IsSimplyContiguous(*sym, foldingContext); -} - static bool isParameterObjectOrSubObject(hlfir::Entity entity) { mlir::Value base = entity; bool foundParameter = false; @@ -1204,6 +1224,10 @@ static bool isParameterObjectOrSubObject(hlfir::Entity entity) { /// fir.box_char...). /// This function should only be called with an actual that is present. /// The optional aspects must be handled by this function user. +/// +/// Note: while Fortran::lower::CallerInterface::PassedEntity (the type of arg) +/// is technically a template type, in the prepare*ActualArgument() calls +/// it resolves to Fortran::evaluate::ActualArgument * static PreparedDummyArgument preparePresentUserCallActualArgument( mlir::Location loc, fir::FirOpBuilder &builder, const Fortran::lower::PreparedActualArgument &preparedActual, @@ -1211,9 +1235,6 @@ static PreparedDummyArgument preparePresentUserCallActualArgument( const Fortran::lower::CallerInterface::PassedEntity &arg, CallContext &callContext) { - Fortran::evaluate::FoldingContext &foldingContext = - callContext.converter.getFoldingContext(); - // Step 1: get the actual argument, which includes addressing the // element if this is an array in an elemental call. hlfir::Entity actual = preparedActual.getActual(loc, builder); @@ -1254,13 +1275,20 @@ static PreparedDummyArgument preparePresentUserCallActualArgument( passingPolymorphicToNonPolymorphic && (actual.isArray() || mlir::isa<fir::BaseBoxType>(dummyType)); - // The simple contiguity of the actual is "lost" when passing a polymorphic - // to a non polymorphic entity because the dummy dynamic type matters for - // the contiguity. - const bool mustDoCopyInOut = - actual.isArray() && arg.mustBeMadeContiguous() && - (passingPolymorphicToNonPolymorphic || - !isSimplyContiguous(*arg.entity, foldingContext)); + bool mustDoCopyIn{false}; + bool mustDoCopyOut{false}; + + if (callContext.doCopyIn) { + Fortran::evaluate::FoldingContext &foldingContext{ + callContext.converter.getFoldingContext()}; + + bool suggestCopyIn = Fortran::evaluate::MayNeedCopy( + arg.entity, arg.characteristics, foldingContext, /*forCopyOut=*/false); + bool suggestCopyOut = Fortran::evaluate::MayNeedCopy( + arg.entity, arg.characteristics, foldingContext, /*forCopyOut=*/true); + mustDoCopyIn = actual.isArray() && suggestCopyIn; + mustDoCopyOut = actual.isArray() && suggestCopyOut; + } const bool actualIsAssumedRank = actual.isAssumedRank(); // Create dummy type with actual argument rank when the dummy is an assumed @@ -1370,8 +1398,14 @@ static PreparedDummyArgument preparePresentUserCallActualArgument( entity = hlfir::Entity{associate.getBase()}; // Register the temporary destruction after the call. preparedDummy.pushExprAssociateCleanUp(associate); - } else if (mustDoCopyInOut) { + } else if (mustDoCopyIn || mustDoCopyOut) { // Copy-in non contiguous variables. + // + // TODO: copy-in and copy-out are now determined separately, in order + // to allow more fine grained copying. While currently both copy-in + // and copy-out are must be done together, these copy operations could + // be separated in the future. (This is related to TODO comment below.) + // // TODO: for non-finalizable monomorphic derived type actual // arguments associated with INTENT(OUT) dummy arguments // we may avoid doing the copy and only allocate the temporary. @@ -1379,7 +1413,7 @@ static PreparedDummyArgument preparePresentUserCallActualArgument( // allocation for the temp in this case. We can communicate // this to the codegen via some CopyInOp flag. // This is a performance concern. - entity = genCopyIn(entity, arg.mayBeModifiedByCall()); + entity = genCopyIn(entity, mustDoCopyOut); } } else { const Fortran::lower::SomeExpr *expr = arg.entity->UnwrapExpr(); @@ -1633,7 +1667,19 @@ void prepareUserCallArguments( (*cleanup)(); break; } - caller.placeInput(arg, builder.createConvert(loc, argTy, value)); + // For %VAL arguments, we should pass the value directly without + // conversion to reference types. If argTy is different from value type, + // it might be due to signature mismatch with internal procedures. + if (argTy == value.getType()) + caller.placeInput(arg, value); + else if (fir::isa_ref_type(argTy) && + fir::dyn_cast_ptrEleTy(argTy) == value.getType()) { + auto loadOp = mlir::cast<fir::LoadOp>(value.getDefiningOp()); + mlir::Value originalStorage = loadOp.getMemref(); + caller.placeInput(arg, originalStorage); + } else + caller.placeInput(arg, builder.createConvert(loc, argTy, value)); + } break; case PassBy::BaseAddressValueAttribute: case PassBy::CharBoxValueAttribute: @@ -2168,10 +2214,15 @@ static std::optional<hlfir::EntityWithAttributes> genHLFIRIntrinsicRefCore( const std::string intrinsicName = callContext.getProcedureName(); const fir::IntrinsicArgumentLoweringRules *argLowering = intrinsicEntry.getArgumentLoweringRules(); + mlir::Type resultType = + callContext.isElementalProcWithArrayArgs() + ? hlfir::getFortranElementType(*callContext.resultType) + : *callContext.resultType; + std::optional<hlfir::EntityWithAttributes> res = Fortran::lower::lowerHlfirIntrinsic(builder, loc, intrinsicName, loweredActuals, argLowering, - *callContext.resultType); + resultType); if (res) return res; } @@ -2326,6 +2377,47 @@ private: } }; +/// Helper for computing elemental function result specification +/// expressions that depends on dummy symbols. See +/// computeDynamicCharacterResultLength below. +static mlir::Value genMockDummyForElementalResultSpecifications( + fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type dummyType, + Fortran::lower::PreparedActualArgument &preparedActual) { + // One is used as the mock address instead of NULL so that PRESENT inquires + // work (this is the only valid thing that specification can do with the + // address thanks to Fortran 2023 C15121). + mlir::Value one = + builder.createIntegerConstant(loc, builder.getIntPtrType(), 1); + if (auto boxCharType = llvm::dyn_cast<fir::BoxCharType>(dummyType)) { + mlir::Value addr = builder.createConvert( + loc, fir::ReferenceType::get(boxCharType.getEleTy()), one); + mlir::Value len = preparedActual.genCharLength(loc, builder); + return fir::EmboxCharOp::create(builder, loc, boxCharType, addr, len); + } + if (auto box = llvm::dyn_cast<fir::BaseBoxType>(dummyType)) { + mlir::Value addr = + builder.createConvert(loc, box.getBaseAddressType(), one); + llvm::SmallVector<mlir::Value> lenParams; + preparedActual.genLengthParameters(loc, builder, lenParams); + mlir::Value mold; + if (fir::isPolymorphicType(box)) + mold = preparedActual.getPolymorphicMold(loc); + return fir::EmboxOp::create(builder, loc, box, addr, + /*shape=*/mlir::Value{}, + /*slice=*/mlir::Value{}, lenParams, mold); + } + // Values of arguments should not be used in elemental procedure specification + // expressions as per C15121, so it makes no sense to have a specification + // expression requiring a symbol that is passed by value (there is no good + // value to create here). + assert(fir::isa_ref_type(dummyType) && + (fir::isa_trivial(fir::unwrapRefType(dummyType)) || + fir::isa_char(fir::unwrapRefType(dummyType))) && + "Only expect symbols inquired in elemental procedure result " + "specifications to be passed in memory"); + return builder.createConvert(loc, dummyType, one); +} + class ElementalUserCallBuilder : public ElementalCallBuilder<ElementalUserCallBuilder> { public: @@ -2358,29 +2450,97 @@ public: mlir::Value computeDynamicCharacterResultLength( Fortran::lower::PreparedActualArguments &loweredActuals, CallContext &callContext) { + fir::FirOpBuilder &builder = callContext.getBuilder(); mlir::Location loc = callContext.loc; auto &converter = callContext.converter; - mlir::Type idxTy = builder.getIndexType(); - llvm::SmallVector<CallCleanUp> callCleanUps; - prepareUserCallArguments(loweredActuals, caller, callSiteType, callContext, - callCleanUps); + // Gather the dummy argument symbols required directly or indirectly to + // evaluate the result symbol specification expressions. + llvm::SmallPtrSet<const Fortran::semantics::Symbol *, 4> + requiredDummySymbols; + const Fortran::semantics::Symbol &result = caller.getResultSymbol(); + for (Fortran::lower::pft::Variable var : + Fortran::lower::pft::getDependentVariableList(result)) + if (var.hasSymbol()) { + const Fortran::semantics::Symbol &sym = var.getSymbol(); + if (Fortran::semantics::IsDummy(sym) && sym.owner() == result.owner()) + requiredDummySymbols.insert(&sym); + } - callContext.symMap.pushScope(); + // Prepare mock FIR arguments for each dummy arguments required in the + // result specifications. These mock arguments will have the same properties + // (dynamic type and type parameters) as the actual arguments, except for + // the address. Such mock argument are needed because this evaluation is + // happening before the loop for the elemental call (the array result + // storage must be allocated before the loops if any is needed, so the + // result properties must be known before the loops). So it is not possible + // to just pick an element (like the first one) and use that because the + // normal argument preparation have effects (vector subscripted actual + // argument will require reading the vector subscript and VALUE arguments + // preparation involve copies of the data. This could cause segfaults in + // case of zero size arrays and is in general pointless extra computation + // since the data cannot be used in the specification expression as per + // C15121). + if (!requiredDummySymbols.empty()) { + const Fortran::semantics::SubprogramDetails *iface = + caller.getInterfaceDetails(); + assert(iface && "interface must be explicit when result specification " + "depends upon dummy symbols"); + for (auto [maybePreparedActual, arg, sym] : llvm::zip( + loweredActuals, caller.getPassedArguments(), iface->dummyArgs())) + if (requiredDummySymbols.contains(sym)) { + mlir::Type dummyType = callSiteType.getInput(arg.firArgument); + + if (!maybePreparedActual.has_value()) { + mlir::Value mockArgValue = + fir::AbsentOp::create(builder, loc, dummyType); + caller.placeInput(arg, mockArgValue); + continue; + } - // Map prepared argument to dummy symbol to be able to lower spec expr. - for (const auto &arg : caller.getPassedArguments()) { - const Fortran::semantics::Symbol *sym = caller.getDummySymbol(arg); - assert(sym && "expect symbol for dummy argument"); - auto input = caller.getInput(arg); - fir::ExtendedValue exv = Fortran::lower::translateToExtendedValue( - loc, builder, hlfir::Entity{input}, callContext.stmtCtx); - fir::FortranVariableOpInterface variableIface = hlfir::genDeclare( - loc, builder, exv, "dummy.tmp", fir::FortranVariableFlagsAttr{}); - callContext.symMap.addVariableDefinition(*sym, variableIface); + Fortran::lower::PreparedActualArgument &preparedActual = + maybePreparedActual.value(); + + if (preparedActual.handleDynamicOptional()) { + mlir::Value isPresent = preparedActual.getIsPresent(); + mlir::Value mockArgValue = + builder + .genIfOp(loc, {dummyType}, isPresent, + /*withElseRegion=*/true) + .genThen([&]() { + mlir::Value mockArgValue = + genMockDummyForElementalResultSpecifications( + builder, loc, dummyType, preparedActual); + fir::ResultOp::create(builder, loc, mockArgValue); + }) + .genElse([&]() { + mlir::Value absent = + fir::AbsentOp::create(builder, loc, dummyType); + fir::ResultOp::create(builder, loc, absent); + }) + .getResults()[0]; + caller.placeInput(arg, mockArgValue); + } else { + mlir::Value mockArgValue = + genMockDummyForElementalResultSpecifications( + builder, loc, dummyType, preparedActual); + caller.placeInput(arg, mockArgValue); + } + } } + // Map symbols required by the result specification expressions to SSA + // values. This will both finish mapping the mock value created above if + // any, and deal with any module/common block variables accessed in the + // specification expressions. + // Map prepared argument to dummy symbol to be able to lower spec expr. + callContext.symMap.pushScope(); + Fortran::lower::mapCallInterfaceSymbolsForResult(converter, caller, + callContext.symMap); + + // Evaluate the result length expression. + mlir::Type idxTy = builder.getIndexType(); auto lowerSpecExpr = [&](const auto &expr) -> mlir::Value { mlir::Value convertExpr = builder.createConvert( loc, idxTy, @@ -2966,8 +3126,11 @@ void Fortran::lower::convertUserDefinedAssignmentToHLFIR( const evaluate::ProcedureRef &procRef, hlfir::Entity lhs, hlfir::Entity rhs, Fortran::lower::SymMap &symMap) { Fortran::lower::StatementContext definedAssignmentContext; + // For defined assignment, don't use regular copy-in/copy-out mechanism: + // defined assignment generates hlfir.region_assign construct, and this + // construct automatically handles any copy-in. CallContext callContext(procRef, /*resultType=*/std::nullopt, loc, converter, - symMap, definedAssignmentContext); + symMap, definedAssignmentContext, /*doCopyIn=*/false); Fortran::lower::CallerInterface caller(procRef, converter); mlir::FunctionType callSiteType = caller.genFunctionType(); PreparedActualArgument preparedLhs{lhs, /*isPresent=*/std::nullopt}; diff --git a/flang/lib/Lower/ConvertConstant.cpp b/flang/lib/Lower/ConvertConstant.cpp index 768a237c9239..376ec12150c7 100644 --- a/flang/lib/Lower/ConvertConstant.cpp +++ b/flang/lib/Lower/ConvertConstant.cpp @@ -145,6 +145,9 @@ private: fir::FirOpBuilder &builder, const Fortran::evaluate::Constant<Fortran::evaluate::Type<TC, KIND>> &constant) { + using Element = + Fortran::evaluate::Scalar<Fortran::evaluate::Type<TC, KIND>>; + static_assert(TC != Fortran::common::TypeCategory::Character, "must be numerical or logical"); auto attrTc = TC == Fortran::common::TypeCategory::Logical @@ -152,7 +155,24 @@ private: : TC; attributeElementType = Fortran::lower::getFIRType(builder.getContext(), attrTc, KIND, {}); - for (auto element : constant.values()) + + const std::vector<Element> &values = constant.values(); + auto sameElements = [&]() -> bool { + if (values.empty()) + return false; + + return std::all_of(values.begin(), values.end(), + [&](const auto &v) { return v == values.front(); }); + }; + + if (sameElements()) { + auto attr = convertToAttribute<TC, KIND>(builder, values.front(), + attributeElementType); + attributes.assign(values.size(), attr); + return; + } + + for (auto element : values) attributes.push_back( convertToAttribute<TC, KIND>(builder, element, attributeElementType)); } diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp index 5588f62e7a0c..d7f94e1f7ca6 100644 --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -2750,7 +2750,7 @@ public: fir::unwrapSequenceType(fir::unwrapPassByRefType(argTy)))) TODO(loc, "passing to an OPTIONAL CONTIGUOUS derived type argument " "with length parameters"); - if (Fortran::evaluate::IsAssumedRank(*expr)) + if (Fortran::semantics::IsAssumedRank(*expr)) TODO(loc, "passing an assumed rank entity to an OPTIONAL " "CONTIGUOUS argument"); // Assumed shape VALUE are currently TODO in the call interface diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp index 9930dd69e0c0..1eda1f1b6135 100644 --- a/flang/lib/Lower/ConvertExprToHLFIR.cpp +++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp @@ -26,7 +26,6 @@ #include "flang/Optimizer/Builder/Complex.h" #include "flang/Optimizer/Builder/IntrinsicCall.h" #include "flang/Optimizer/Builder/MutableBox.h" -#include "flang/Optimizer/Builder/Runtime/Character.h" #include "flang/Optimizer/Builder/Runtime/Derived.h" #include "flang/Optimizer/Builder/Runtime/Pointer.h" #include "flang/Optimizer/Builder/Todo.h" @@ -1286,16 +1285,8 @@ struct BinaryOp<Fortran::evaluate::Relational< fir::FirOpBuilder &builder, const Op &op, hlfir::Entity lhs, hlfir::Entity rhs) { - auto [lhsExv, lhsCleanUp] = - hlfir::translateToExtendedValue(loc, builder, lhs); - auto [rhsExv, rhsCleanUp] = - hlfir::translateToExtendedValue(loc, builder, rhs); - auto cmp = fir::runtime::genCharCompare( - builder, loc, translateSignedRelational(op.opr), lhsExv, rhsExv); - if (lhsCleanUp) - (*lhsCleanUp)(); - if (rhsCleanUp) - (*rhsCleanUp)(); + auto cmp = hlfir::CmpCharOp::create( + builder, loc, translateSignedRelational(op.opr), lhs, rhs); return hlfir::EntityWithAttributes{cmp}; } }; @@ -1822,10 +1813,8 @@ private: // Allocate scalar temporary that will be initialized // with the values specified by the constructor. mlir::Value storagePtr = builder.createTemporary(loc, recTy); - auto varOp = hlfir::EntityWithAttributes{hlfir::DeclareOp::create( - builder, loc, storagePtr, "ctor.temp", /*shape=*/nullptr, - /*typeparams=*/mlir::ValueRange{}, /*dummy_scope=*/nullptr, - fir::FortranVariableFlagsAttr{})}; + auto varOp = hlfir::EntityWithAttributes{ + hlfir::DeclareOp::create(builder, loc, storagePtr, "ctor.temp")}; // Initialize any components that need initialization. mlir::Value box = builder.createBox(loc, fir::ExtendedValue{varOp}); diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index fd66592bc285..ccfde16ce2c3 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -786,62 +786,6 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter, return res; } -/// Device allocatable components in a derived-type don't have the correct -/// allocator index in their descriptor when they are created. After -/// initialization, cuf.set_allocator_idx operations are inserted to set the -/// correct allocator index for each device component. -static void -initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &symbol, - Fortran::lower::SymMap &symMap) { - if (const auto *details{ - symbol.GetUltimate() - .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - fir::ExtendedValue exv = - converter.getSymbolExtendedValue(symbol.GetUltimate(), &symMap); - mlir::Type baseTy = fir::unwrapRefType(fir::getBase(exv).getType()); - if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - if (fir::isAllocatableType(fir::getBase(exv).getType()) || - fir::isPointerType(fir::getBase(exv).getType())) - return; // Allocator index need to be set after allocation. - - auto recTy = - mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy)); - assert(recTy && "expected fir::RecordType"); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - for (const auto &sym : components) { - if (Fortran::semantics::IsDeviceAllocatable(sym)) { - llvm::SmallVector<mlir::Value> coord; - mlir::Type fieldTy = - Fortran::lower::gatherDeviceComponentCoordinatesAndType( - builder, loc, sym, recTy, coord); - mlir::Value base = fir::getBase(exv); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), base, coord); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), sym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - /// Must \p var be default initialized at runtime when entering its scope. static bool mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) { @@ -898,7 +842,8 @@ void Fortran::lower::defaultInitializeAtRuntime( Fortran::semantics::DeclTypeSpec::Category::TypeDerived && !mlir::isa<fir::SequenceType>(symTy) && !sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) && - !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) { + !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) && + !Fortran::semantics::HasCUDAComponent(sym)) { std::string globalName = fir::NameUniquer::doGenerated( (converter.mangleName(*declTy->AsDerived()) + fir::kNameSeparator + fir::kDerivedTypeInitSuffix) @@ -1164,9 +1109,6 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter, if (mustBeDefaultInitializedAtRuntime(var)) Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(), symMap); - if (converter.getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)) - initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap); auto *builder = &converter.getFirOpBuilder(); if (needCUDAAlloc(var.getSymbol()) && !cuf::isCUDADeviceContext(builder->getRegion())) { @@ -1413,6 +1355,7 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter, mlir::Value bytePtr = fir::CoordinateOp::create( builder, loc, i8Ptr, storeAddr, mlir::ValueRange{offset}); mlir::Value typedPtr = castAliasToPointer(builder, loc, symType, bytePtr); + converter.bindSymbolStorage(sym, {storeAddr, off}); Fortran::lower::StatementContext stmtCtx; mapSymbolAttributes(converter, var, symMap, stmtCtx, typedPtr); // Default initialization is possible for equivalence members: see @@ -1425,9 +1368,6 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter, if (mustBeDefaultInitializedAtRuntime(var)) Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(), symMap); - if (converter.getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)) - initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap); } //===--------------------------------------------------------------===// @@ -1655,13 +1595,15 @@ void Fortran::lower::defineCommonBlocks( mlir::Value Fortran::lower::genCommonBlockMember( Fortran::lower::AbstractConverter &converter, mlir::Location loc, - const Fortran::semantics::Symbol &sym, mlir::Value commonValue) { + const Fortran::semantics::Symbol &sym, mlir::Value commonValue, + std::size_t commonSize) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); std::size_t byteOffset = sym.GetUltimate().offset(); mlir::IntegerType i8Ty = builder.getIntegerType(8); mlir::Type i8Ptr = builder.getRefType(i8Ty); - mlir::Type seqTy = builder.getRefType(builder.getVarLenSeqTy(i8Ty)); + fir::SequenceType::Shape shape(1, commonSize); + mlir::Type seqTy = builder.getRefType(fir::SequenceType::get(shape, i8Ty)); mlir::Value base = builder.createConvert(loc, seqTy, commonValue); mlir::Value offs = @@ -1670,6 +1612,8 @@ mlir::Value Fortran::lower::genCommonBlockMember( mlir::ValueRange{offs}); mlir::Type symType = converter.genType(sym); + converter.bindSymbolStorage(sym, {base, byteOffset}); + return Fortran::semantics::FindEquivalenceSet(sym) != nullptr ? castAliasToPointer(builder, loc, symType, varAddr) : builder.createConvert(loc, builder.getRefType(symType), varAddr); @@ -1698,7 +1642,8 @@ static void instantiateCommon(Fortran::lower::AbstractConverter &converter, symMap.addSymbol(common, commonAddr); } - mlir::Value local = genCommonBlockMember(converter, loc, varSym, commonAddr); + mlir::Value local = + genCommonBlockMember(converter, loc, varSym, commonAddr, common.size()); Fortran::lower::StatementContext stmtCtx; mapSymbolAttributes(converter, var, symMap, stmtCtx, local); } @@ -1720,7 +1665,7 @@ static bool lowerToBoxValue(const Fortran::semantics::Symbol &sym, return true; // Assumed rank and optional fir.box cannot yet be read while lowering the // specifications. - if (Fortran::evaluate::IsAssumedRank(sym) || + if (Fortran::semantics::IsAssumedRank(sym) || Fortran::semantics::IsOptional(sym)) return true; // Polymorphic entity should be tracked through a fir.box that has the @@ -1970,7 +1915,8 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter, // Declare a local pointer variable. auto newBase = hlfir::DeclareOp::create( builder, loc, boxAlloc, name, /*shape=*/nullptr, lenParams, - /*dummy_scope=*/nullptr, attributes); + /*dummy_scope=*/nullptr, /*storage=*/nullptr, + /*storage_offset=*/0, attributes); mlir::Value nullAddr = builder.createNullConstant( loc, llvm::cast<fir::BaseBoxType>(ptrBoxType).getEleTy()); @@ -2000,9 +1946,10 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter, mlir::Value dummyScope; if (converter.isRegisteredDummySymbol(sym)) dummyScope = converter.dummyArgsScopeValue(); - auto newBase = - hlfir::DeclareOp::create(builder, loc, base, name, shapeOrShift, - lenParams, dummyScope, attributes, dataAttr); + auto [storage, storageOffset] = converter.getSymbolStorage(sym); + auto newBase = hlfir::DeclareOp::create( + builder, loc, base, name, shapeOrShift, lenParams, dummyScope, storage, + storageOffset, attributes, dataAttr); symMap.addVariableDefinition(sym, newBase, force); return; } @@ -2060,8 +2007,10 @@ void Fortran::lower::genDeclareSymbol( base = genPackArray(converter, sym, exv); dummyScope = converter.dummyArgsScopeValue(); } - hlfir::EntityWithAttributes declare = hlfir::genDeclare( - loc, builder, base, name, attributes, dummyScope, dataAttr); + auto [storage, storageOffset] = converter.getSymbolStorage(sym); + hlfir::EntityWithAttributes declare = + hlfir::genDeclare(loc, builder, base, name, attributes, dummyScope, + storage, storageOffset, dataAttr); symMap.addVariableDefinition(sym, declare.getIfVariableInterface(), force); return; } @@ -2149,15 +2098,19 @@ void Fortran::lower::mapSymbolAttributes( if (Fortran::semantics::IsProcedure(sym)) { if (isUnusedEntryDummy) { // Additional discussion below. - mlir::Type dummyProcType = - Fortran::lower::getDummyProcedureType(sym, converter); - mlir::Value undefOp = fir::UndefOp::create(builder, loc, dummyProcType); - - Fortran::lower::genDeclareSymbol(converter, symMap, sym, undefOp); - } - - // Procedure pointer. - if (Fortran::semantics::IsPointer(sym)) { + if (Fortran::semantics::IsPointer(sym)) { + mlir::Type procPtrType = + Fortran::lower::getDummyProcedurePointerType(sym, converter); + mlir::Value undefOp = fir::UndefOp::create(builder, loc, procPtrType); + genProcPointer(converter, symMap, sym, undefOp, replace); + } else { + mlir::Type dummyProcType = + Fortran::lower::getDummyProcedureType(sym, converter); + mlir::Value undefOp = fir::UndefOp::create(builder, loc, dummyProcType); + Fortran::lower::genDeclareSymbol(converter, symMap, sym, undefOp); + } + } else if (Fortran::semantics::IsPointer(sym)) { + // Used procedure pointer. // global mlir::Value boxAlloc = preAlloc; // dummy or passed result @@ -2172,7 +2125,7 @@ void Fortran::lower::mapSymbolAttributes( return; } - const bool isAssumedRank = Fortran::evaluate::IsAssumedRank(sym); + const bool isAssumedRank = Fortran::semantics::IsAssumedRank(sym); if (isAssumedRank && !allowAssumedRank) TODO(loc, "assumed-rank variable in procedure implemented in Fortran"); diff --git a/flang/lib/Lower/HlfirIntrinsics.cpp b/flang/lib/Lower/HlfirIntrinsics.cpp index 39595d6f519a..27c8bb87f754 100644 --- a/flang/lib/Lower/HlfirIntrinsics.cpp +++ b/flang/lib/Lower/HlfirIntrinsics.cpp @@ -69,6 +69,11 @@ protected: mlir::Value loadBoxAddress( const std::optional<Fortran::lower::PreparedActualArgument> &arg); + mlir::Value + loadTrivialScalar(const Fortran::lower::PreparedActualArgument &arg); + + mlir::Value loadOptionalValue(Fortran::lower::PreparedActualArgument &arg); + void addCleanup(std::optional<hlfir::CleanupFunction> cleanup) { if (cleanup) cleanupFns.emplace_back(std::move(*cleanup)); @@ -159,6 +164,18 @@ protected: hlfir::CharExtremumPredicate pred; }; +class HlfirCharTrimLowering : public HlfirTransformationalIntrinsic { +public: + HlfirCharTrimLowering(fir::FirOpBuilder &builder, mlir::Location loc) + : HlfirTransformationalIntrinsic(builder, loc) {} + +protected: + mlir::Value + lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals, + const fir::IntrinsicArgumentLoweringRules *argLowering, + mlir::Type stmtResultType) override; +}; + class HlfirCShiftLowering : public HlfirTransformationalIntrinsic { public: using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic; @@ -192,6 +209,17 @@ protected: mlir::Type stmtResultType) override; }; +class HlfirIndexLowering : public HlfirTransformationalIntrinsic { +public: + using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic; + +protected: + mlir::Value + lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals, + const fir::IntrinsicArgumentLoweringRules *argLowering, + mlir::Type stmtResultType) override; +}; + } // namespace mlir::Value HlfirTransformationalIntrinsic::loadBoxAddress( @@ -227,19 +255,22 @@ mlir::Value HlfirTransformationalIntrinsic::loadBoxAddress( return boxOrAbsent; } -static mlir::Value loadOptionalValue( - mlir::Location loc, fir::FirOpBuilder &builder, - const std::optional<Fortran::lower::PreparedActualArgument> &arg, - hlfir::Entity actual) { - if (!arg->handleDynamicOptional()) - return hlfir::loadTrivialScalar(loc, builder, actual); +mlir::Value HlfirTransformationalIntrinsic::loadOptionalValue( + Fortran::lower::PreparedActualArgument &arg) { + mlir::Type eleType = arg.getFortranElementType(); - mlir::Value isPresent = arg->getIsPresent(); - mlir::Type eleType = hlfir::getFortranElementType(actual.getType()); + // For an elemental call, getActual() may produce + // a designator denoting the array element to be passed + // to the subprogram. If the actual array is dynamically + // optional the designator must be generated under + // isPresent check (see also genIntrinsicRefCore). return builder - .genIfOp(loc, {eleType}, isPresent, + .genIfOp(loc, {eleType}, arg.getIsPresent(), /*withElseRegion=*/true) .genThen([&]() { + hlfir::Entity actual = arg.getActual(loc, builder); + assert(eleType == actual.getFortranElementType() && + "result type mismatch in genOptionalValue"); assert(actual.isScalar() && fir::isa_trivial(eleType) && "must be a numerical or logical scalar"); hlfir::Entity val = hlfir::loadTrivialScalar(loc, builder, actual); @@ -252,6 +283,12 @@ static mlir::Value loadOptionalValue( .getResults()[0]; } +mlir::Value HlfirTransformationalIntrinsic::loadTrivialScalar( + const Fortran::lower::PreparedActualArgument &arg) { + hlfir::Entity actual = arg.getActual(loc, builder); + return hlfir::loadTrivialScalar(loc, builder, actual); +} + llvm::SmallVector<mlir::Value> HlfirTransformationalIntrinsic::getOperandVector( const Fortran::lower::PreparedActualArguments &loweredActuals, const fir::IntrinsicArgumentLoweringRules *argLowering) { @@ -265,29 +302,33 @@ llvm::SmallVector<mlir::Value> HlfirTransformationalIntrinsic::getOperandVector( operands.emplace_back(); continue; } - hlfir::Entity actual = arg->getActual(loc, builder); mlir::Value valArg; - if (!argLowering) { - valArg = hlfir::loadTrivialScalar(loc, builder, actual); - } else { - fir::ArgLoweringRule argRules = - fir::lowerIntrinsicArgumentAs(*argLowering, i); - if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Box) - valArg = loadBoxAddress(arg); - else if (!argRules.handleDynamicOptional && - argRules.lowerAs != fir::LowerIntrinsicArgAs::Inquired) - valArg = hlfir::derefPointersAndAllocatables(loc, builder, actual); - else if (argRules.handleDynamicOptional && - argRules.lowerAs == fir::LowerIntrinsicArgAs::Value) - valArg = loadOptionalValue(loc, builder, arg, actual); - else if (argRules.handleDynamicOptional) + valArg = loadTrivialScalar(*arg); + operands.emplace_back(valArg); + continue; + } + fir::ArgLoweringRule argRules = + fir::lowerIntrinsicArgumentAs(*argLowering, i); + if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Box) { + valArg = loadBoxAddress(arg); + } else if (argRules.handleDynamicOptional) { + if (argRules.lowerAs == fir::LowerIntrinsicArgAs::Value) { + if (arg->handleDynamicOptional()) + valArg = loadOptionalValue(*arg); + else + valArg = loadTrivialScalar(*arg); + } else { TODO(loc, "hlfir transformational intrinsic dynamically optional " "argument without box lowering"); + } + } else { + hlfir::Entity actual = arg->getActual(loc, builder); + if (argRules.lowerAs != fir::LowerIntrinsicArgAs::Inquired) + valArg = hlfir::derefPointersAndAllocatables(loc, builder, actual); else valArg = actual.getBase(); } - operands.emplace_back(valArg); } return operands; @@ -421,6 +462,15 @@ mlir::Value HlfirCharExtremumLowering::lowerImpl( return createOp<hlfir::CharExtremumOp>(pred, mlir::ValueRange{operands}); } +mlir::Value HlfirCharTrimLowering::lowerImpl( + const Fortran::lower::PreparedActualArguments &loweredActuals, + const fir::IntrinsicArgumentLoweringRules *argLowering, + mlir::Type stmtResultType) { + auto operands = getOperandVector(loweredActuals, argLowering); + assert(operands.size() == 1); + return createOp<hlfir::CharTrimOp>(operands[0]); +} + mlir::Value HlfirCShiftLowering::lowerImpl( const Fortran::lower::PreparedActualArguments &loweredActuals, const fir::IntrinsicArgumentLoweringRules *argLowering, @@ -492,6 +542,22 @@ mlir::Value HlfirReshapeLowering::lowerImpl( operands[2], operands[3]); } +mlir::Value HlfirIndexLowering::lowerImpl( + const Fortran::lower::PreparedActualArguments &loweredActuals, + const fir::IntrinsicArgumentLoweringRules *argLowering, + mlir::Type stmtResultType) { + auto operands = getOperandVector(loweredActuals, argLowering); + // 'kind' optional operand is unused here as it has already been + // translated into result type. + assert(operands.size() == 4); + mlir::Value substr = operands[1]; + mlir::Value str = operands[0]; + mlir::Value back = operands[2]; + mlir::Value result = + createOp<hlfir::IndexOp>(stmtResultType, substr, str, back); + return result; +} + std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic( fir::FirOpBuilder &builder, mlir::Location loc, const std::string &name, const Fortran::lower::PreparedActualArguments &loweredActuals, @@ -546,6 +612,10 @@ std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic( if (name == "reshape") return HlfirReshapeLowering{builder, loc}.lower(loweredActuals, argLowering, stmtResultType); + if (name == "index") + return HlfirIndexLowering{builder, loc}.lower(loweredActuals, argLowering, + stmtResultType); + if (mlir::isa<fir::CharacterType>(stmtResultType)) { if (name == "min") return HlfirCharExtremumLowering{builder, loc, @@ -555,6 +625,9 @@ std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic( return HlfirCharExtremumLowering{builder, loc, hlfir::CharExtremumPredicate::max} .lower(loweredActuals, argLowering, stmtResultType); + if (name == "trim") + return HlfirCharTrimLowering{builder, loc}.lower( + loweredActuals, argLowering, stmtResultType); } return std::nullopt; } diff --git a/flang/lib/Lower/HostAssociations.cpp b/flang/lib/Lower/HostAssociations.cpp index 2a330ccc4eeb..ad6aba1d28ae 100644 --- a/flang/lib/Lower/HostAssociations.cpp +++ b/flang/lib/Lower/HostAssociations.cpp @@ -431,7 +431,7 @@ public: mlir::Value box = args.valueInTuple; mlir::IndexType idxTy = builder.getIndexType(); llvm::SmallVector<mlir::Value> lbounds; - if (!ba.lboundIsAllOnes() && !Fortran::evaluate::IsAssumedRank(sym)) { + if (!ba.lboundIsAllOnes() && !Fortran::semantics::IsAssumedRank(sym)) { if (ba.isStaticArray()) { for (std::int64_t lb : ba.staticLBound()) lbounds.emplace_back(builder.createIntegerConstant(loc, idxTy, lb)); @@ -490,7 +490,7 @@ private: bool isPolymorphic = type && type->IsPolymorphic(); return isScalarOrContiguous && !isPolymorphic && !isDerivedWithLenParameters(sym) && - !Fortran::evaluate::IsAssumedRank(sym); + !Fortran::semantics::IsAssumedRank(sym); } }; } // namespace diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 35edcb0926b6..d8a0e4d8a8fa 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -61,6 +61,11 @@ static llvm::cl::opt<bool> strideIncludeLowerExtent( "Whether to include the lower dimensions extents in the stride."), llvm::cl::init(true)); +static llvm::cl::opt<bool> lowerDoLoopToAccLoop( + "openacc-do-loop-to-acc-loop", + llvm::cl::desc("Whether to lower do loops as `acc.loop` operations."), + llvm::cl::init(true)); + // Special value for * passed in device_type or gang clauses. static constexpr std::int64_t starCst = -1; @@ -1212,12 +1217,10 @@ mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe( auto leftDeclOp = hlfir::DeclareOp::create( builder, loc, recipe.getCopyRegion().getArgument(0), llvm::StringRef{}, - shape, llvm::ArrayRef<mlir::Value>{}, /*dummy_scope=*/nullptr, - fir::FortranVariableFlagsAttr{}); + shape); auto rightDeclOp = hlfir::DeclareOp::create( builder, loc, recipe.getCopyRegion().getArgument(1), llvm::StringRef{}, - shape, llvm::ArrayRef<mlir::Value>{}, /*dummy_scope=*/nullptr, - fir::FortranVariableFlagsAttr{}); + shape); hlfir::DesignateOp::Subscripts triplets = getSubscriptsFromArgs(recipe.getCopyRegion().getArguments()); @@ -1523,14 +1526,10 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, auto shape = genShapeFromBoundsOrArgs(loc, builder, seqTy, bounds, recipe.getCombinerRegion().getArguments()); - auto v1DeclareOp = hlfir::DeclareOp::create( - builder, loc, value1, llvm::StringRef{}, shape, - llvm::ArrayRef<mlir::Value>{}, - /*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{}); - auto v2DeclareOp = hlfir::DeclareOp::create( - builder, loc, value2, llvm::StringRef{}, shape, - llvm::ArrayRef<mlir::Value>{}, - /*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{}); + auto v1DeclareOp = hlfir::DeclareOp::create(builder, loc, value1, + llvm::StringRef{}, shape); + auto v2DeclareOp = hlfir::DeclareOp::create(builder, loc, value2, + llvm::StringRef{}, shape); hlfir::DesignateOp::Subscripts triplets = getTripletsFromArgs(recipe); llvm::SmallVector<mlir::Value> lenParamsLeft; @@ -1575,7 +1574,7 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, if (bounds.empty()) { llvm::SmallVector<mlir::Value> extents; mlir::Type idxTy = builder.getIndexType(); - for (auto extent : seqTy.getShape()) { + for (auto extent : llvm::reverse(seqTy.getShape())) { mlir::Value lb = mlir::arith::ConstantOp::create( builder, loc, idxTy, builder.getIntegerAttr(idxTy, 0)); mlir::Value ub = mlir::arith::ConstantOp::create( @@ -1607,12 +1606,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, } } else { // Lowerbound, upperbound and step are passed as block arguments. - [[maybe_unused]] unsigned nbRangeArgs = + unsigned nbRangeArgs = recipe.getCombinerRegion().getArguments().size() - 2; assert((nbRangeArgs / 3 == seqTy.getDimension()) && "Expect 3 block arguments per dimension"); - for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size(); - i += 3) { + for (int i = nbRangeArgs - 1; i >= 2; i -= 3) { mlir::Value lb = recipe.getCombinerRegion().getArgument(i); mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1); mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2); @@ -1623,8 +1621,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, ivs.push_back(loop.getInductionVar()); } } - auto addr1 = fir::CoordinateOp::create(builder, loc, refTy, value1, ivs); - auto addr2 = fir::CoordinateOp::create(builder, loc, refTy, value2, ivs); + llvm::SmallVector<mlir::Value> reversedIvs(ivs.rbegin(), ivs.rend()); + auto addr1 = + fir::CoordinateOp::create(builder, loc, refTy, value1, reversedIvs); + auto addr2 = + fir::CoordinateOp::create(builder, loc, refTy, value2, reversedIvs); auto load1 = fir::LoadOp::create(builder, loc, addr1); auto load2 = fir::LoadOp::create(builder, loc, addr2); mlir::Value res = @@ -5009,6 +5010,9 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct( Fortran::semantics::SemanticsContext &semanticsContext, Fortran::lower::SymMap &localSymbols, const Fortran::parser::DoConstruct &doConstruct, pft::Evaluation &eval) { + if (!lowerDoLoopToAccLoop) + return nullptr; + // Only convert loops which have induction variables that need privatized. if (!doConstruct.IsDoNormal() && !doConstruct.IsDoConcurrent()) return nullptr; @@ -5031,10 +5035,6 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct( return nullptr; } - // Open up a new scope for the loop variables. - localSymbols.pushScope(); - auto scopeGuard = llvm::make_scope_exit([&]() { localSymbols.popScope(); }); - // Prepare empty operand vectors since there are no associated `acc loop` // clauses with the Fortran do loops being handled here. llvm::SmallVector<mlir::Value> privateOperands, gangOperands, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 5d19f589d79f..a96884f5680b 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -19,6 +19,7 @@ #include "flang/Lower/Support/ReductionProcessor.h" #include "flang/Parser/tools.h" #include "flang/Semantics/tools.h" +#include "flang/Utils/OpenMP.h" #include "llvm/Frontend/OpenMP/OMP.h.inc" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" @@ -272,10 +273,15 @@ bool ClauseProcessor::processCancelDirectiveName( bool ClauseProcessor::processCollapse( mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const { - return collectLoopRelatedInfo(converter, currentLocation, eval, clauses, - result, iv); + + int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, + clauses, loopResult, iv); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); + return numCollapse > 1; } bool ClauseProcessor::processDevice(lower::StatementContext &stmtCtx, @@ -521,6 +527,13 @@ bool ClauseProcessor::processProcBind( return false; } +bool ClauseProcessor::processTileSizes( + lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const { + auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}; + collectTileSizesFromOpenMPConstruct(ompCons, result.tileSizes, semaCtx); + return !result.tileSizes.empty(); +} + bool ClauseProcessor::processSafelen( mlir::omp::SafelenClauseOps &result) const { if (auto *clause = findUniqueClause<omp::clause::Safelen>()) { @@ -647,10 +660,8 @@ addAlignedClause(lower::AbstractConverter &converter, // The default alignment for some targets is equal to 0. // Do not generate alignment assumption if alignment is less than or equal to - // 0. - if (alignment > 0) { - // alignment value must be power of 2 - assert((alignment & (alignment - 1)) == 0 && "alignment is not power of 2"); + // 0 or not a power of two + if (alignment > 0 && ((alignment & (alignment - 1)) == 0)) { auto &objects = std::get<omp::ObjectList>(clause.t); if (!objects.empty()) genObjectList(objects, converter, alignedVars); @@ -846,10 +857,12 @@ createCopyFunc(mlir::Location loc, lower::AbstractConverter &converter, } auto declDst = hlfir::DeclareOp::create( builder, loc, dst, copyFuncName + "_dst", shape, typeparams, - /*dummy_scope=*/nullptr, attrs); + /*dummy_scope=*/nullptr, /*storage=*/nullptr, + /*storage_offset=*/0, attrs); auto declSrc = hlfir::DeclareOp::create( builder, loc, src, copyFuncName + "_src", shape, typeparams, - /*dummy_scope=*/nullptr, attrs); + /*dummy_scope=*/nullptr, /*storage=*/nullptr, + /*storage_offset=*/0, attrs); converter.copyVar(loc, declDst.getBase(), declSrc.getBase(), varAttrs); mlir::func::ReturnOp::create(builder, loc); return funcOp; @@ -1281,7 +1294,7 @@ void ClauseProcessor::processMapObjects( auto location = mlir::NameLoc::get( mlir::StringAttr::get(firOpBuilder.getContext(), asFortran.str()), baseOp.getLoc()); - mlir::omp::MapInfoOp mapOp = createMapInfoOp( + mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp( firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds, /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{}, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index c46bdb348a3e..324ea3c1047a 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -63,7 +63,8 @@ public: mlir::omp::CancelDirectiveNameClauseOps &result) const; bool processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; @@ -98,6 +99,8 @@ public: bool processPriority(lower::StatementContext &stmtCtx, mlir::omp::PriorityClauseOps &result) const; bool processProcBind(mlir::omp::ProcBindClauseOps &result) const; + bool processTileSizes(lower::pft::Evaluation &eval, + mlir::omp::LoopNestOperands &result) const; bool processSafelen(mlir::omp::SafelenClauseOps &result) const; bool processSchedule(lower::StatementContext &stmtCtx, mlir::omp::ScheduleClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 1a16e1c87e25..cecc1a939589 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -221,6 +221,8 @@ MAKE_EMPTY_CLASS(Capture, Capture); MAKE_EMPTY_CLASS(Compare, Compare); MAKE_EMPTY_CLASS(DynamicAllocators, DynamicAllocators); MAKE_EMPTY_CLASS(Full, Full); +MAKE_EMPTY_CLASS(GraphId, GraphId); +MAKE_EMPTY_CLASS(GraphReset, GraphReset); MAKE_EMPTY_CLASS(Inbranch, Inbranch); MAKE_EMPTY_CLASS(Mergeable, Mergeable); MAKE_EMPTY_CLASS(Nogroup, Nogroup); diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index 9d1c730b38ed..146a252b049e 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -30,6 +30,7 @@ #include "flang/Semantics/tools.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Frontend/OpenMP/OMP.h" #include <variant> namespace Fortran { @@ -89,13 +90,14 @@ DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter, isTargetPrivatization) {} void DataSharingProcessor::processStep1( - mlir::omp::PrivateClauseOps *clauseOps) { + mlir::omp::PrivateClauseOps *clauseOps, + std::optional<llvm::omp::Directive> dir) { collectSymbolsForPrivatization(); collectDefaultSymbols(); collectImplicitSymbols(); collectPreDeterminedSymbols(); - privatize(clauseOps); + privatize(clauseOps, dir); insertBarrier(clauseOps); } @@ -422,47 +424,10 @@ static parser::CharBlock getSource(const semantics::SemanticsContext &semaCtx, }); } -static void collectPrivatizingConstructs( - llvm::SmallSet<llvm::omp::Directive, 16> &constructs, unsigned version) { - using Clause = llvm::omp::Clause; - using Directive = llvm::omp::Directive; - - static const Clause privatizingClauses[] = { - Clause::OMPC_private, - Clause::OMPC_lastprivate, - Clause::OMPC_firstprivate, - Clause::OMPC_in_reduction, - Clause::OMPC_reduction, - Clause::OMPC_linear, - // TODO: Clause::OMPC_induction, - Clause::OMPC_task_reduction, - Clause::OMPC_detach, - Clause::OMPC_use_device_ptr, - Clause::OMPC_is_device_ptr, - }; - - for (auto dir : llvm::enum_seq_inclusive<Directive>(Directive::First_, - Directive::Last_)) { - bool allowsPrivatizing = llvm::any_of(privatizingClauses, [&](Clause cls) { - return llvm::omp::isAllowedClauseForDirective(dir, cls, version); - }); - if (allowsPrivatizing) - constructs.insert(dir); - } -} - bool DataSharingProcessor::isOpenMPPrivatizingConstruct( const parser::OpenMPConstruct &omp, unsigned version) { - static llvm::SmallSet<llvm::omp::Directive, 16> privatizing; - [[maybe_unused]] static bool init = - (collectPrivatizingConstructs(privatizing, version), true); - - // As of OpenMP 6.0, privatizing constructs (with the test being if they - // allow a privatizing clause) are: dispatch, distribute, do, for, loop, - // parallel, scope, sections, simd, single, target, target_data, task, - // taskgroup, taskloop, and teams. - return llvm::is_contained(privatizing, - parser::omp::GetOmpDirectiveName(omp).v); + return llvm::omp::isPrivatizingConstruct( + parser::omp::GetOmpDirectiveName(omp).v, version); } bool DataSharingProcessor::isOpenMPPrivatizingEvaluation( @@ -617,14 +582,15 @@ void DataSharingProcessor::collectPreDeterminedSymbols() { preDeterminedSymbols); } -void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps) { +void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps, + std::optional<llvm::omp::Directive> dir) { for (const semantics::Symbol *sym : allPrivatizedSymbols) { if (const auto *commonDet = sym->detailsIf<semantics::CommonBlockDetails>()) { for (const auto &mem : commonDet->objects()) - privatizeSymbol(&*mem, clauseOps); + privatizeSymbol(&*mem, clauseOps, dir); } else - privatizeSymbol(sym, clauseOps); + privatizeSymbol(sym, clauseOps, dir); } } @@ -643,7 +609,8 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) { void DataSharingProcessor::privatizeSymbol( const semantics::Symbol *symToPrivatize, - mlir::omp::PrivateClauseOps *clauseOps) { + mlir::omp::PrivateClauseOps *clauseOps, + std::optional<llvm::omp::Directive> dir) { if (!useDelayedPrivatization) { cloneSymbol(symToPrivatize); copyFirstPrivateSymbol(symToPrivatize); @@ -653,7 +620,7 @@ void DataSharingProcessor::privatizeSymbol( Fortran::lower::privatizeSymbol<mlir::omp::PrivateClauseOp, mlir::omp::PrivateClauseOps>( converter, firOpBuilder, symTable, allPrivatizedSymbols, - mightHaveReadHostSym, symToPrivatize, clauseOps); + mightHaveReadHostSym, symToPrivatize, clauseOps, dir); } } // namespace omp } // namespace lower diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 00b2d95bab22..f6aa8652e353 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -126,7 +126,8 @@ private: void collectDefaultSymbols(); void collectImplicitSymbols(); void collectPreDeterminedSymbols(); - void privatize(mlir::omp::PrivateClauseOps *clauseOps); + void privatize(mlir::omp::PrivateClauseOps *clauseOps, + std::optional<llvm::omp::Directive> dir = std::nullopt); void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); void cloneSymbol(const semantics::Symbol *sym); @@ -167,7 +168,8 @@ public: // Step2 performs the copying for lastprivates and requires knowledge of the // MLIR operation to insert the last private update. Step2 adds // dealocation code as well. - void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr); + void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr, + std::optional<llvm::omp::Directive> dir = std::nullopt); void processStep2(mlir::Operation *op, bool isLoop); void pushLoopIV(mlir::Value iv) { loopIVs.push_back(iv); } @@ -184,7 +186,8 @@ public: } void privatizeSymbol(const semantics::Symbol *symToPrivatize, - mlir::omp::PrivateClauseOps *clauseOps); + mlir::omp::PrivateClauseOps *clauseOps, + std::optional<llvm::omp::Directive> dir = std::nullopt); }; } // namespace omp diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ec2ec37e623f..0ec33e6b24db 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -38,6 +38,7 @@ #include "flang/Semantics/tools.h" #include "flang/Support/Flags.h" #include "flang/Support/OpenMP-utils.h" +#include "flang/Utils/OpenMP.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Support/StateStack.h" @@ -47,6 +48,7 @@ using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; +using namespace Fortran::utils::openmp; //===----------------------------------------------------------------------===// // Code generation helper functions @@ -407,7 +409,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ - [&](const parser::OpenMPBlockConstruct &ompConstruct) { + [&](const parser::OmpBlockConstruct &ompConstruct) { beginClauseList = &ompConstruct.BeginDir().Clauses(); if (auto &endSpec = ompConstruct.EndDir()) endClauseList = &endSpec->Clauses(); @@ -501,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -520,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -531,7 +533,14 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); + break; + + case OMPD_teams_workdistribute: + cp.processThreadLimit(stmtCtx, hostInfo->ops); + [[fallthrough]]; + case OMPD_target_teams_workdistribute: + cp.processNumTeams(stmtCtx, hostInfo->ops); break; // Standalone 'target' case. @@ -679,7 +688,7 @@ static void threadPrivatizeVars(lower::AbstractConverter &converter, } symThreadprivateValue = lower::genCommonBlockMember( converter, currentLocation, sym->GetUltimate(), - commonThreadprivateValue); + commonThreadprivateValue, common->size()); } else { symThreadprivateValue = genThreadprivateOp(*sym); } @@ -1392,7 +1401,7 @@ static void genIntermediateCommonBlockAccessors( for (auto obj : details->objects()) { auto targetCBMemberBind = Fortran::lower::genCommonBlockMember( - converter, currentLocation, *obj, mapArg); + converter, currentLocation, *obj, mapArg, mapSym->size()); fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj); fir::ExtendedValue targetCBExv = getExtendedValue(sexv, targetCBMemberBind); @@ -1413,7 +1422,7 @@ static void genBodyOfTargetOp( auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp); mlir::Region ®ion = targetOp.getRegion(); - mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region); + genEntryBlock(firOpBuilder, args, region); bindEntryBlockArgs(converter, targetOp, args); if (HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter)) hostEvalInfo->bindOperands(argIface.getHostEvalBlockArgs()); @@ -1422,104 +1431,7 @@ static void genBodyOfTargetOp( // If so, then either clone them as well if they are MemoryEffectFree, or else // copy them to a new temporary and add them to the map and block_argument // lists and replace their uses with the new temporary. - llvm::SetVector<mlir::Value> valuesDefinedAbove; - mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); - while (!valuesDefinedAbove.empty()) { - for (mlir::Value val : valuesDefinedAbove) { - mlir::Operation *valOp = val.getDefiningOp(); - - // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the - // indices separately, as the alternative is to eventually map the Box, - // which comes with a fairly large overhead comparatively. We could be - // more robust about this and check using a BackwardsSlice to see if we - // run the risk of mapping a box. - if (valOp && mlir::isMemoryEffectFree(valOp) && - !mlir::isa<fir::BoxDimsOp>(valOp)) { - mlir::Operation *clonedOp = valOp->clone(); - entryBlock->push_front(clonedOp); - - auto replace = [entryBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == entryBlock; - }; - - valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace); - valOp->replaceUsesWithIf(clonedOp, replace); - } else { - auto savedIP = firOpBuilder.getInsertionPoint(); - - if (valOp) - firOpBuilder.setInsertionPointAfter(valOp); - else - // This means val is a block argument - firOpBuilder.setInsertionPoint(targetOp); - - auto copyVal = - firOpBuilder.createTemporary(val.getLoc(), val.getType()); - firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); - - fir::factory::AddrAndBoundsInfo info = - fir::factory::getDataOperandBaseAddr( - firOpBuilder, val, /*isOptional=*/false, val.getLoc()); - llvm::SmallVector<mlir::Value> bounds = - fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp, - mlir::omp::MapBoundsType>( - firOpBuilder, info, - hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder, - hlfir::Entity{val}) - .first, - /*dataExvIsAssumedSize=*/false, val.getLoc()); - - std::stringstream name; - firOpBuilder.setInsertionPoint(targetOp); - - llvm::omp::OpenMPOffloadMappingFlags mapFlag = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; - mlir::omp::VariableCaptureKind captureKind = - mlir::omp::VariableCaptureKind::ByRef; - - mlir::Type eleType = copyVal.getType(); - if (auto refType = - mlir::dyn_cast<fir::ReferenceType>(copyVal.getType())) - eleType = refType.getElementType(); - - if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { - captureKind = mlir::omp::VariableCaptureKind::ByCopy; - } else if (!fir::isa_builtin_cptr_type(eleType)) { - mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - } - - mlir::Value mapOp = createMapInfoOp( - firOpBuilder, copyVal.getLoc(), copyVal, - /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, - /*members=*/llvm::SmallVector<mlir::Value>{}, - /*membersIndex=*/mlir::ArrayAttr{}, - static_cast< - std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( - mapFlag), - captureKind, copyVal.getType()); - - // Get the index of the first non-map argument before modifying mapVars, - // then append an element to mapVars and an associated entry block - // argument at that index. - unsigned insertIndex = - argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs(); - targetOp.getMapVarsMutable().append(mapOp); - mlir::Value clonedValArg = region.insertArgument( - insertIndex, copyVal.getType(), copyVal.getLoc()); - - firOpBuilder.setInsertionPointToStart(entryBlock); - auto loadOp = fir::LoadOp::create(firOpBuilder, clonedValArg.getLoc(), - clonedValArg); - val.replaceUsesWithIf(loadOp->getResult(0), - [entryBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == entryBlock; - }); - firOpBuilder.setInsertionPoint(entryBlock, savedIP); - } - } - valuesDefinedAbove.clear(); - mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); - } + cloneOrMapRegionOutsiders(firOpBuilder, targetOp); // Insert dummy instruction to remember the insertion position. The // marker will be deleted since there are not uses. @@ -1657,9 +1569,10 @@ genLoopNestClauses(lower::AbstractConverter &converter, HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv)) - cp.processCollapse(loc, eval, clauseOps, iv); + cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + cp.processTileSizes(eval, clauseOps); } static void genLoopClauses( @@ -2036,9 +1949,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector<const semantics::Symbol *>(iv); }; - auto *nestedEval = - getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); - + uint64_t nestValue = getCollapseValue(item->clauses); + nestValue = nestValue < iv.size() ? iv.size() : nestValue; + auto *nestedEval = getCollapsedLoopEval(eval, nestValue); return genOpWithBody<mlir::omp::LoopNestOp>( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -2820,6 +2733,17 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::omp::WorkdistributeOp genWorkdistributeOp( + lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + return genOpWithBody<mlir::omp::WorkdistributeOp>( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, + llvm::omp::Directive::OMPD_workdistribute), + queue, item); +} + //===----------------------------------------------------------------------===// // Code generation functions for the standalone version of constructs that can // also be a leaf of a composite construct @@ -3237,7 +3161,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd( DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/true, symTable); - simdItemDSP.processStep1(&simdClauseOps); + simdItemDSP.processStep1(&simdClauseOps, simdItem->id); // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. @@ -3459,7 +3383,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - // case llvm::omp::Directive::OMPD_workdistribute: + case llvm::omp::Directive::OMPD_workdistribute: + newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue, + item); + break; case llvm::omp::Directive::OMPD_workshare: newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); @@ -3766,7 +3693,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - const parser::OpenMPBlockConstruct &blockConstruct) { + const parser::OmpBlockConstruct &blockConstruct) { const parser::OmpDirectiveSpecification &beginSpec = blockConstruct.BeginDir(); List<Clause> clauses = makeClauses(beginSpec.Clauses(), semaCtx); @@ -3917,8 +3844,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: - // Emit the omp.loop_nest with annotation for tiling - genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + // Skip OMPD_tile since the tile sizes will be retrieved when + // generating the omp.loop_nest op. break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; @@ -3958,9 +3885,12 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, List<Clause> clauses = makeClauses( std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx); const auto &endSectionsDirective = - std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t); + std::get<std::optional<parser::OmpEndSectionsDirective>>( + sectionsConstruct.t); + assert(endSectionsDirective && + "Missing end section directive should have been handled in semantics"); clauses.append(makeClauses( - std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx)); + std::get<parser::OmpClauseList>(endSectionsDirective->t), semaCtx)); mlir::Location currentLocation = converter.getCurrentLocation(); llvm::omp::Directive directive = @@ -4028,18 +3958,6 @@ void Fortran::lower::genOpenMPSymbolProperties( lower::genDeclareTargetIntGlobal(converter, var); } -int64_t -Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { - for (const parser::OmpClause &clause : clauseList.v) { - if (const auto &collapseClause = - std::get_if<parser::OmpClause::Collapse>(&clause.u)) { - const auto *expr = semantics::GetExpr(collapseClause->v); - return evaluate::ToInt64(*expr).value(); - } - } - return 1; -} - void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -4060,8 +3978,9 @@ void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, firOpBuilder, currentLocation, commonValue.getType(), commonValue); converter.bindSymbol(*common, commonThreadprivateValue); // Generate the threadprivate value for the common block member. - symThreadprivateValue = genCommonBlockMember(converter, currentLocation, - sym, commonThreadprivateValue); + symThreadprivateValue = + genCommonBlockMember(converter, currentLocation, sym, + commonThreadprivateValue, common->size()); } else if (!var.isGlobal()) { // Non-global variable which can be in threadprivate directive must be one // variable in main program, and it has implicit SAVE attribute. Take it as @@ -4124,7 +4043,7 @@ void Fortran::lower::genDeclareTargetIntGlobal( bool Fortran::lower::isOpenMPTargetConstruct( const parser::OpenMPConstruct &omp) { llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown; - if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) { + if (const auto *block = std::get_if<parser::OmpBlockConstruct>(&omp.u)) { dir = block->BeginDir().DirId(); } else if (const auto *loop = std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 77b1e39083aa..d1d1cd68a5b4 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -13,6 +13,7 @@ #include "Utils.h" #include "ClauseFinder.h" +#include "flang/Evaluate/fold.h" #include "flang/Lower/OpenMP/Clauses.h" #include <flang/Lower/AbstractConverter.h> #include <flang/Lower/ConvertType.h> @@ -24,10 +25,32 @@ #include <flang/Parser/parse-tree.h> #include <flang/Parser/tools.h> #include <flang/Semantics/tools.h> +#include <flang/Semantics/type.h> +#include <flang/Utils/OpenMP.h> #include <llvm/Support/CommandLine.h> #include <iterator> +template <typename T> +Fortran::semantics::MaybeIntExpr +EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { + if (Fortran::semantics::MaybeExpr maybeExpr{ + Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { + if (auto *intExpr{ + Fortran::evaluate::UnwrapExpr<Fortran::semantics::SomeIntExpr>( + *maybeExpr)}) { + return std::move(*intExpr); + } + } + return std::nullopt; +} + +template <typename T> +std::optional<std::int64_t> +EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { + return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); +} + llvm::cl::opt<bool> treatIndexAsSection( "openmp-treat-index-as-section", llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), @@ -108,38 +131,6 @@ void gatherFuncAndVarSyms( symbolAndClause.emplace_back(clause, *object.sym(), automap); } -mlir::omp::MapInfoOp -createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Value baseAddr, mlir::Value varPtrPtr, - llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds, - llvm::ArrayRef<mlir::Value> members, - mlir::ArrayAttr membersIndex, uint64_t mapType, - mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, - bool partialMap, mlir::FlatSymbolRefAttr mapperId) { - if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(baseAddr.getType())) { - baseAddr = fir::BoxAddrOp::create(builder, loc, baseAddr); - retTy = baseAddr.getType(); - } - - mlir::TypeAttr varType = mlir::TypeAttr::get( - llvm::cast<mlir::omp::PointerLikeType>(retTy).getElementType()); - - // For types with unknown extents such as <2x?xi32> we discard the incomplete - // type info and only retain the base type. The correct dimensions are later - // recovered through the bounds info. - if (auto seqType = llvm::dyn_cast<fir::SequenceType>(varType.getValue())) - if (seqType.hasDynamicExtents()) - varType = mlir::TypeAttr::get(seqType.getEleTy()); - - mlir::omp::MapInfoOp op = mlir::omp::MapInfoOp::create( - builder, loc, retTy, baseAddr, varType, - builder.getIntegerAttr(builder.getIntegerType(64, false), mapType), - builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType), - varPtrPtr, members, membersIndex, bounds, mapperId, - builder.getStringAttr(name), builder.getBoolAttr(partialMap)); - return op; -} - // This function gathers the individual omp::Object's that make up a // larger omp::Object symbol. // @@ -403,7 +394,7 @@ mlir::Value createParentSymAndGenIntermediateMaps( // Create a map for the intermediate member and insert it and it's // indices into the parentMemberIndices list to track it. - mlir::omp::MapInfoOp mapOp = createMapInfoOp( + mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp( firOpBuilder, clauseLocation, curValue, /*varPtrPtr=*/mlir::Value{}, asFortran, /*bounds=*/interimBounds, @@ -563,7 +554,7 @@ void insertChildMapInfoIntoParent( converter.getCurrentLocation(), asFortran, bounds, treatIndexAsSection); - mlir::omp::MapInfoOp mapOp = createMapInfoOp( + mlir::omp::MapInfoOp mapOp = utils::openmp::createMapInfoOp( firOpBuilder, info.rawInput.getLoc(), info.rawInput, /*varPtrPtr=*/mlir::Value(), asFortran.str(), bounds, members, firOpBuilder.create2DI64ArrayAttr( @@ -608,12 +599,64 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -bool collectLoopRelatedInfo( +// Helper function that finds the sizes clause in a inner OMPD_tile directive +// and passes the sizes clause to the callback function if found. +static void processTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + std::function<void(const parser::OmpClause::Sizes *)> processFun) { + if (!ompCons) + return; + if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) { + const auto &nestedOptional = + std::get<std::optional<parser::NestedConstruct>>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); + const auto &innerBegin = + std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t); + const auto &innerDirective = + std::get<parser::OmpLoopDirective>(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_tile) { + // Get the size values from parse tree and convert to a vector. + const auto &innerClauseList{ + std::get<parser::OmpClauseList>(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) { + if (const auto tclause{ + std::get_if<parser::OmpClause::Sizes>(&clause.u)}) { + processFun(tclause); + break; + } + } + } + } + } +} + +/// Populates the sizes vector with values if the given OpenMPConstruct +/// contains a loop construct with an inner tiling construct. +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl<int64_t> &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx) { + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + for (auto &tval : tclause->v) + if (const auto v{EvaluateInt64(semaCtx, tval)}) + tileSizes.push_back(*v); + }); +} + +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl<const semantics::Symbol *> &iv) { - bool found = false; + int64_t numCollapse = 1; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. @@ -626,9 +669,19 @@ bool collectLoopRelatedInfo( if (auto *clause = ClauseFinder::findUniqueClause<omp::clause::Collapse>(clauses)) { collapseValue = evaluate::ToInt64(clause->v).value(); - found = true; + numCollapse = collapseValue; + } + + // Collect sizes from tile directive if present. + std::int64_t sizesLengthValue = 0l; + if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) { + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + sizesLengthValue = tclause->v.size(); + }); } + collapseValue = std::max(collapseValue, sizesLengthValue); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -662,7 +715,7 @@ bool collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return found; + return numCollapse; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 60f44a7f0610..5f191d89ae20 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -114,16 +114,6 @@ struct OmpMapParentAndMemberData { semantics::SemanticsContext &semaCtx); }; -mlir::omp::MapInfoOp -createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Value baseAddr, mlir::Value varPtrPtr, - llvm::StringRef name, llvm::ArrayRef<mlir::Value> bounds, - llvm::ArrayRef<mlir::Value> members, - mlir::ArrayAttr membersIndex, uint64_t mapType, - mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, - bool partialMap = false, - mlir::FlatSymbolRefAttr mapperId = mlir::FlatSymbolRefAttr()); - void insertChildMapInfoIntoParent( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, @@ -169,12 +159,17 @@ void genObjectList(const ObjectList &objects, void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, mlir::Location loc); -bool collectLoopRelatedInfo( +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl<const semantics::Symbol *> &iv); +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl<int64_t> &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx); + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Lower/Runtime.cpp b/flang/lib/Lower/Runtime.cpp index fc59a2414d53..494dd49e961b 100644 --- a/flang/lib/Lower/Runtime.cpp +++ b/flang/lib/Lower/Runtime.cpp @@ -39,8 +39,7 @@ static void genUnreachable(fir::FirOpBuilder &builder, mlir::Location loc) { if (parentOp->getDialect()->getNamespace() == mlir::omp::OpenMPDialect::getDialectNamespace()) Fortran::lower::genOpenMPTerminator(builder, parentOp, loc); - else if (parentOp->getDialect()->getNamespace() == - mlir::acc::OpenACCDialect::getDialectNamespace()) + else if (Fortran::lower::isInsideOpenACCComputeConstruct(builder)) Fortran::lower::genOpenACCTerminator(builder, parentOp, loc); else fir::UnreachableOp::create(builder, loc); diff --git a/flang/lib/Lower/Support/Utils.cpp b/flang/lib/Lower/Support/Utils.cpp index 0cdb03beb72a..1b4d37e9798a 100644 --- a/flang/lib/Lower/Support/Utils.cpp +++ b/flang/lib/Lower/Support/Utils.cpp @@ -655,7 +655,8 @@ void privatizeSymbol( lower::SymMap &symTable, llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols, llvm::SmallPtrSet<const semantics::Symbol *, 16> &mightHaveReadHostSym, - const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps) { + const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps, + std::optional<llvm::omp::Directive> dir) { constexpr bool isDoConcurrent = std::is_same_v<OpType, fir::LocalitySpecifierOp>; mlir::OpBuilder::InsertPoint dcIP; @@ -676,6 +677,13 @@ void privatizeSymbol( bool emitCopyRegion = symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate) || symToPrivatize->test(semantics::Symbol::Flag::LocalityLocalInit); + // A symbol attached to the simd directive can have the firstprivate flag set + // on it when it is also used in a non-firstprivate privatization clause. + // For instance: $omp do simd lastprivate(a) firstprivate(a) + // We cannot apply the firstprivate privatizer to simd, so make sure we do + // not emit the copy region when dealing with the SIMD directive. + if (dir && dir == llvm::omp::Directive::OMPD_simd) + emitCopyRegion = false; mlir::Value privVal = hsb.getAddr(); mlir::Type allocType = privVal.getType(); @@ -848,7 +856,8 @@ privatizeSymbol<mlir::omp::PrivateClauseOp, mlir::omp::PrivateClauseOps>( llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols, llvm::SmallPtrSet<const semantics::Symbol *, 16> &mightHaveReadHostSym, const semantics::Symbol *symToPrivatize, - mlir::omp::PrivateClauseOps *clauseOps); + mlir::omp::PrivateClauseOps *clauseOps, + std::optional<llvm::omp::Directive> dir); template void privatizeSymbol<fir::LocalitySpecifierOp, fir::LocalitySpecifierOperands>( @@ -857,6 +866,7 @@ privatizeSymbol<fir::LocalitySpecifierOp, fir::LocalitySpecifierOperands>( llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols, llvm::SmallPtrSet<const semantics::Symbol *, 16> &mightHaveReadHostSym, const semantics::Symbol *symToPrivatize, - fir::LocalitySpecifierOperands *clauseOps); + fir::LocalitySpecifierOperands *clauseOps, + std::optional<llvm::omp::Directive> dir); } // end namespace Fortran::lower diff --git a/flang/lib/Lower/SymbolMap.cpp b/flang/lib/Lower/SymbolMap.cpp index b929dfbd5aec..080f21ec6740 100644 --- a/flang/lib/Lower/SymbolMap.cpp +++ b/flang/lib/Lower/SymbolMap.cpp @@ -82,6 +82,23 @@ Fortran::lower::SymMap::lookupImpliedDo(Fortran::lower::SymMap::AcDoVar var) { return {}; } +void Fortran::lower::SymMap::registerStorage( + semantics::SymbolRef symRef, Fortran::lower::SymMap::StorageDesc storage) { + auto *sym = symRef->HasLocalLocality() ? &*symRef : &symRef->GetUltimate(); + assert(storage.first && "registerting storage without an address"); + storageMapStack.back().insert_or_assign(sym, std::move(storage)); +} + +Fortran::lower::SymMap::StorageDesc +Fortran::lower::SymMap::lookupStorage(Fortran::semantics::SymbolRef symRef) { + auto *sym = symRef->HasLocalLocality() ? &*symRef : &symRef->GetUltimate(); + auto &map = storageMapStack.back(); + auto iter = map.find(sym); + if (iter != map.end()) + return iter->second; + return {nullptr, 0}; +} + void Fortran::lower::SymbolBox::dump() const { llvm::errs() << *this << '\n'; } void Fortran::lower::SymMap::dump() const { llvm::errs() << *this << '\n'; } |
