diff options
Diffstat (limited to 'flang/lib/Optimizer/CodeGen')
| -rw-r--r-- | flang/lib/Optimizer/CodeGen/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | flang/lib/Optimizer/CodeGen/CodeGen.cpp | 27 | ||||
| -rw-r--r-- | flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp | 58 | ||||
| -rw-r--r-- | flang/lib/Optimizer/CodeGen/Target.cpp | 25 |
4 files changed, 82 insertions, 29 deletions
diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt index 980307db315d..16c7944a885a 100644 --- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt +++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt @@ -34,6 +34,7 @@ add_flang_library(FIRCodeGen MLIR_LIBS MLIRComplexToLLVM + MLIRComplexToROCDLLibraryCalls MLIRComplexToStandard MLIRGPUDialect MLIRMathToFuncs diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index ecc04a6c9a2b..d879382555c3 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -33,6 +33,7 @@ #include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h" +#include "mlir/Conversion/ComplexToROCDLLibraryCalls/ComplexToROCDLLibraryCalls.h" #include "mlir/Conversion/ComplexToStandard/ComplexToStandard.h" #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" @@ -1122,6 +1123,16 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> { for (mlir::Value opnd : adaptor.getOperands()) size = rewriter.create<mlir::LLVM::MulOp>( loc, ity, size, integerCast(loc, rewriter, ity, opnd)); + + // As the return value of malloc(0) is implementation defined, allocate one + // byte to ensure the allocation status being true. This behavior aligns to + // what the runtime has. + mlir::Value zero = genConstantIndex(loc, ity, rewriter, 0); + mlir::Value one = genConstantIndex(loc, ity, rewriter, 1); + mlir::Value cmp = rewriter.create<mlir::LLVM::ICmpOp>( + loc, mlir::LLVM::ICmpPredicate::sgt, size, zero); + size = rewriter.create<mlir::LLVM::SelectOp>(loc, cmp, size, one); + auto mallocTyWidth = lowerTy().getIndexTypeBitwidth(); auto mallocTy = mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth); @@ -4145,22 +4156,24 @@ public: // conversions that affect the ModuleOp, e.g. create new // function operations in it. We have to run such conversions // as passes here. - mlir::OpPassManager mathConvertionPM("builtin.module"); + mlir::OpPassManager mathConversionPM("builtin.module"); bool isAMDGCN = fir::getTargetTriple(mod).isAMDGCN(); // If compiling for AMD target some math operations must be lowered to AMD // GPU library calls, the rest can be converted to LLVM intrinsics, which // is handled in the mathToLLVM conversion. The lowering to libm calls is // not needed since all math operations are handled this way. - if (isAMDGCN) - mathConvertionPM.addPass(mlir::createConvertMathToROCDL()); + if (isAMDGCN) { + mathConversionPM.addPass(mlir::createConvertMathToROCDL()); + mathConversionPM.addPass(mlir::createConvertComplexToROCDLLibraryCalls()); + } // Convert math::FPowI operations to inline implementation // only if the exponent's width is greater than 32, otherwise, // it will be lowered to LLVM intrinsic operation by a later conversion. mlir::ConvertMathToFuncsOptions mathToFuncsOptions{}; mathToFuncsOptions.minWidthOfFPowIExponent = 33; - mathConvertionPM.addPass( + mathConversionPM.addPass( mlir::createConvertMathToFuncs(mathToFuncsOptions)); mlir::ConvertComplexToStandardPassOptions complexToStandardOptions{}; @@ -4173,15 +4186,15 @@ public: complexToStandardOptions.complexRange = mlir::complex::ComplexRangeFlags::improved; } - mathConvertionPM.addPass( + mathConversionPM.addPass( mlir::createConvertComplexToStandardPass(complexToStandardOptions)); // Convert Math dialect operations into LLVM dialect operations. // There is no way to prefer MathToLLVM patterns over MathToLibm // patterns (applied below), so we have to run MathToLLVM conversion here. - mathConvertionPM.addNestedPass<mlir::func::FuncOp>( + mathConversionPM.addNestedPass<mlir::func::FuncOp>( mlir::createConvertMathToLLVMPass()); - if (mlir::failed(runPipeline(mathConvertionPM, mod))) + if (mlir::failed(runPipeline(mathConversionPM, mod))) return signalPassFailure(); std::optional<mlir::DataLayout> dl = diff --git a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp index 2774382c22bf..d2cf85bedd54 100644 --- a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp +++ b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp @@ -38,7 +38,7 @@ #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRType.h" -#include "flang/Optimizer/OpenACC/RegisterOpenACCExtensions.h" +#include "flang/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.h" #include "flang/Optimizer/OpenMP/Support/RegisterOpenMPExtensions.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -63,13 +63,14 @@ private: static constexpr llvm::StringRef bufferName = ".repacked"; // Return value of fir::BaseBoxType that represents a temporary - // array created for the original box with given extents and - // type parameters. The new box has the default lower bounds. - // If useStack is true, then the temporary will be allocated + // array created for the original box with given lbounds/extents and + // type parameters. The new box has the same shape as the original + // array. If useStack is true, then the temporary will be allocated // in stack memory (when possible). static mlir::Value allocateTempBuffer(fir::FirOpBuilder &builder, mlir::Location loc, bool useStack, mlir::Value origBox, + llvm::ArrayRef<mlir::Value> lbounds, llvm::ArrayRef<mlir::Value> extents, llvm::ArrayRef<mlir::Value> typeParams); @@ -99,7 +100,9 @@ public: // the presence of the stack attribute does not automatically // mean that the allocation is actually done in stack memory. // For example, we always do the heap allocation for polymorphic -// types using Fortran runtime. +// types using Fortran runtime. Currently, we allocate all +// repack temporaries of derived types as polymorphic, +// so that we can preserve the dynamic type of the original. // Adding the polymorpic mold to fir.alloca and then using // Fortran runtime to compute the allocation size could probably // resolve this limitation. @@ -170,7 +173,8 @@ PackArrayConversion::matchAndRewrite(fir::PackArrayOp op, mlir::Value PackArrayConversion::allocateTempBuffer( fir::FirOpBuilder &builder, mlir::Location loc, bool useStack, - mlir::Value origBox, llvm::ArrayRef<mlir::Value> extents, + mlir::Value origBox, llvm::ArrayRef<mlir::Value> lbounds, + llvm::ArrayRef<mlir::Value> extents, llvm::ArrayRef<mlir::Value> typeParams) { auto tempType = mlir::cast<fir::SequenceType>( fir::extractSequenceType(origBox.getType())); @@ -191,16 +195,35 @@ mlir::Value PackArrayConversion::allocateTempBuffer( assert(!isHeapAllocation && "temp must have been allocated on the stack"); mlir::Type ptrType = base.getType(); - if (llvm::isa<fir::BaseBoxType>(ptrType)) - return base; + if (auto tempBoxType = mlir::dyn_cast<fir::BaseBoxType>(ptrType)) { + // We need to reset the CFI_attribute_allocatable before + // returning the temporary box to avoid any mishandling + // of the temporary box in Fortran runtime. + base = builder.create<fir::BoxAddrOp>(loc, fir::boxMemRefType(tempBoxType), + base); + ptrType = base.getType(); + } - mlir::Type tempBoxType = fir::BoxType::get(mlir::isa<fir::HeapType>(ptrType) - ? ptrType - : fir::unwrapRefType(ptrType)); + // Create the temporary using dynamic type of the original, + // if it is polymorphic, or it has a derived type with SEQUENCE + // or BIND attribute (such dummy arguments may have their dynamic + // type not exactly matching their static type). + // Note that for the latter case, the allocation can still be done + // without the mold, because the dynamic and static types + // must be storage compatible. + bool useDynamicType = fir::isBoxedRecordType(origBox.getType()) || + fir::isPolymorphicType(origBox.getType()); + mlir::Type tempBoxType = + fir::wrapInClassOrBoxType(fir::unwrapRefType(ptrType), + /*isPolymorphic=*/useDynamicType); + // Use the shape with proper lower bounds for the final box. + shape = builder.genShape(loc, lbounds, extents); mlir::Value newBox = builder.createBox(loc, tempBoxType, base, shape, /*slice=*/nullptr, - typeParams, /*tdesc=*/nullptr); - return newBox; + typeParams, useDynamicType ? origBox : nullptr); + // The new box might be !fir.class, while the original might be + // !fir.box - we have to add a conversion. + return builder.createConvert(loc, origBox.getType(), newBox); } mlir::FailureOr<mlir::Value> @@ -280,16 +303,11 @@ PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder, << op.getOperation() << '\n'; } - mlir::Value tempBox = - allocateTempBuffer(builder, loc, op.getStack(), box, extents, typeParams); + mlir::Value tempBox = allocateTempBuffer(builder, loc, op.getStack(), box, + lbounds, extents, typeParams); if (!op.getNoCopy()) fir::runtime::genShallowCopy(builder, loc, tempBox, box, /*resultIsAllocated=*/true); - - // Set lower bounds after the original box. - mlir::Value shift = builder.genShift(loc, lbounds); - tempBox = builder.create<fir::ReboxOp>(loc, boxType, tempBox, shift, - /*slice=*/nullptr); builder.create<fir::ResultOp>(loc, tempBox); return ifOp.getResult(0); diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp index 7dbf21ce0c12..b60a72e4340b 100644 --- a/flang/lib/Optimizer/CodeGen/Target.cpp +++ b/flang/lib/Optimizer/CodeGen/Target.cpp @@ -1443,14 +1443,35 @@ struct TargetAMDGPU : public GenericTarget<TargetAMDGPU> { CodeGenSpecifics::Marshalling complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override { CodeGenSpecifics::Marshalling marshal; - TODO(loc, "handle complex argument types"); + const auto *sem = &floatToSemantics(kindMap, eleTy); + if (sem == &llvm::APFloat::IEEEsingle()) { + // Lower COMPLEX(KIND=4) as an array of two element values. + marshal.emplace_back(fir::SequenceType::get({2}, eleTy), AT{}); + } else if (sem == &llvm::APFloat::IEEEdouble()) { + // Pass COMPLEX(KIND=8) as two separate arguments. + marshal.emplace_back(eleTy, AT{}); + marshal.emplace_back(eleTy, AT{}); + } else { + typeTodo(sem, loc, "argument"); + } return marshal; } CodeGenSpecifics::Marshalling complexReturnType(mlir::Location loc, mlir::Type eleTy) const override { CodeGenSpecifics::Marshalling marshal; - TODO(loc, "handle complex return types"); + const auto *sem = &floatToSemantics(kindMap, eleTy); + if (sem == &llvm::APFloat::IEEEsingle()) { + // Return COMPLEX(KIND=4) as an array of two elements. + marshal.emplace_back(fir::SequenceType::get({2}, eleTy), AT{}); + } else if (sem == &llvm::APFloat::IEEEdouble()) { + // Return COMPLEX(KIND=8) via an aggregate with two fields. + marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(), + mlir::TypeRange{eleTy, eleTy}), + AT{}); + } else { + typeTodo(sem, loc, "return"); + } return marshal; } }; |
