summaryrefslogtreecommitdiff
path: root/flang/lib/Optimizer/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'flang/lib/Optimizer/CodeGen')
-rw-r--r--flang/lib/Optimizer/CodeGen/CMakeLists.txt1
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGen.cpp27
-rw-r--r--flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp58
-rw-r--r--flang/lib/Optimizer/CodeGen/Target.cpp25
4 files changed, 82 insertions, 29 deletions
diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
index 980307db315d..16c7944a885a 100644
--- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -34,6 +34,7 @@ add_flang_library(FIRCodeGen
MLIR_LIBS
MLIRComplexToLLVM
+ MLIRComplexToROCDLLibraryCalls
MLIRComplexToStandard
MLIRGPUDialect
MLIRMathToFuncs
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index ecc04a6c9a2b..d879382555c3 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -33,6 +33,7 @@
#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h"
+#include "mlir/Conversion/ComplexToROCDLLibraryCalls/ComplexToROCDLLibraryCalls.h"
#include "mlir/Conversion/ComplexToStandard/ComplexToStandard.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
@@ -1122,6 +1123,16 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
for (mlir::Value opnd : adaptor.getOperands())
size = rewriter.create<mlir::LLVM::MulOp>(
loc, ity, size, integerCast(loc, rewriter, ity, opnd));
+
+ // As the return value of malloc(0) is implementation defined, allocate one
+ // byte to ensure the allocation status being true. This behavior aligns to
+ // what the runtime has.
+ mlir::Value zero = genConstantIndex(loc, ity, rewriter, 0);
+ mlir::Value one = genConstantIndex(loc, ity, rewriter, 1);
+ mlir::Value cmp = rewriter.create<mlir::LLVM::ICmpOp>(
+ loc, mlir::LLVM::ICmpPredicate::sgt, size, zero);
+ size = rewriter.create<mlir::LLVM::SelectOp>(loc, cmp, size, one);
+
auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
auto mallocTy =
mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
@@ -4145,22 +4156,24 @@ public:
// conversions that affect the ModuleOp, e.g. create new
// function operations in it. We have to run such conversions
// as passes here.
- mlir::OpPassManager mathConvertionPM("builtin.module");
+ mlir::OpPassManager mathConversionPM("builtin.module");
bool isAMDGCN = fir::getTargetTriple(mod).isAMDGCN();
// If compiling for AMD target some math operations must be lowered to AMD
// GPU library calls, the rest can be converted to LLVM intrinsics, which
// is handled in the mathToLLVM conversion. The lowering to libm calls is
// not needed since all math operations are handled this way.
- if (isAMDGCN)
- mathConvertionPM.addPass(mlir::createConvertMathToROCDL());
+ if (isAMDGCN) {
+ mathConversionPM.addPass(mlir::createConvertMathToROCDL());
+ mathConversionPM.addPass(mlir::createConvertComplexToROCDLLibraryCalls());
+ }
// Convert math::FPowI operations to inline implementation
// only if the exponent's width is greater than 32, otherwise,
// it will be lowered to LLVM intrinsic operation by a later conversion.
mlir::ConvertMathToFuncsOptions mathToFuncsOptions{};
mathToFuncsOptions.minWidthOfFPowIExponent = 33;
- mathConvertionPM.addPass(
+ mathConversionPM.addPass(
mlir::createConvertMathToFuncs(mathToFuncsOptions));
mlir::ConvertComplexToStandardPassOptions complexToStandardOptions{};
@@ -4173,15 +4186,15 @@ public:
complexToStandardOptions.complexRange =
mlir::complex::ComplexRangeFlags::improved;
}
- mathConvertionPM.addPass(
+ mathConversionPM.addPass(
mlir::createConvertComplexToStandardPass(complexToStandardOptions));
// Convert Math dialect operations into LLVM dialect operations.
// There is no way to prefer MathToLLVM patterns over MathToLibm
// patterns (applied below), so we have to run MathToLLVM conversion here.
- mathConvertionPM.addNestedPass<mlir::func::FuncOp>(
+ mathConversionPM.addNestedPass<mlir::func::FuncOp>(
mlir::createConvertMathToLLVMPass());
- if (mlir::failed(runPipeline(mathConvertionPM, mod)))
+ if (mlir::failed(runPipeline(mathConversionPM, mod)))
return signalPassFailure();
std::optional<mlir::DataLayout> dl =
diff --git a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
index 2774382c22bf..d2cf85bedd54 100644
--- a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
+++ b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
@@ -38,7 +38,7 @@
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
-#include "flang/Optimizer/OpenACC/RegisterOpenACCExtensions.h"
+#include "flang/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.h"
#include "flang/Optimizer/OpenMP/Support/RegisterOpenMPExtensions.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -63,13 +63,14 @@ private:
static constexpr llvm::StringRef bufferName = ".repacked";
// Return value of fir::BaseBoxType that represents a temporary
- // array created for the original box with given extents and
- // type parameters. The new box has the default lower bounds.
- // If useStack is true, then the temporary will be allocated
+ // array created for the original box with given lbounds/extents and
+ // type parameters. The new box has the same shape as the original
+ // array. If useStack is true, then the temporary will be allocated
// in stack memory (when possible).
static mlir::Value allocateTempBuffer(fir::FirOpBuilder &builder,
mlir::Location loc, bool useStack,
mlir::Value origBox,
+ llvm::ArrayRef<mlir::Value> lbounds,
llvm::ArrayRef<mlir::Value> extents,
llvm::ArrayRef<mlir::Value> typeParams);
@@ -99,7 +100,9 @@ public:
// the presence of the stack attribute does not automatically
// mean that the allocation is actually done in stack memory.
// For example, we always do the heap allocation for polymorphic
-// types using Fortran runtime.
+// types using Fortran runtime. Currently, we allocate all
+// repack temporaries of derived types as polymorphic,
+// so that we can preserve the dynamic type of the original.
// Adding the polymorpic mold to fir.alloca and then using
// Fortran runtime to compute the allocation size could probably
// resolve this limitation.
@@ -170,7 +173,8 @@ PackArrayConversion::matchAndRewrite(fir::PackArrayOp op,
mlir::Value PackArrayConversion::allocateTempBuffer(
fir::FirOpBuilder &builder, mlir::Location loc, bool useStack,
- mlir::Value origBox, llvm::ArrayRef<mlir::Value> extents,
+ mlir::Value origBox, llvm::ArrayRef<mlir::Value> lbounds,
+ llvm::ArrayRef<mlir::Value> extents,
llvm::ArrayRef<mlir::Value> typeParams) {
auto tempType = mlir::cast<fir::SequenceType>(
fir::extractSequenceType(origBox.getType()));
@@ -191,16 +195,35 @@ mlir::Value PackArrayConversion::allocateTempBuffer(
assert(!isHeapAllocation && "temp must have been allocated on the stack");
mlir::Type ptrType = base.getType();
- if (llvm::isa<fir::BaseBoxType>(ptrType))
- return base;
+ if (auto tempBoxType = mlir::dyn_cast<fir::BaseBoxType>(ptrType)) {
+ // We need to reset the CFI_attribute_allocatable before
+ // returning the temporary box to avoid any mishandling
+ // of the temporary box in Fortran runtime.
+ base = builder.create<fir::BoxAddrOp>(loc, fir::boxMemRefType(tempBoxType),
+ base);
+ ptrType = base.getType();
+ }
- mlir::Type tempBoxType = fir::BoxType::get(mlir::isa<fir::HeapType>(ptrType)
- ? ptrType
- : fir::unwrapRefType(ptrType));
+ // Create the temporary using dynamic type of the original,
+ // if it is polymorphic, or it has a derived type with SEQUENCE
+ // or BIND attribute (such dummy arguments may have their dynamic
+ // type not exactly matching their static type).
+ // Note that for the latter case, the allocation can still be done
+ // without the mold, because the dynamic and static types
+ // must be storage compatible.
+ bool useDynamicType = fir::isBoxedRecordType(origBox.getType()) ||
+ fir::isPolymorphicType(origBox.getType());
+ mlir::Type tempBoxType =
+ fir::wrapInClassOrBoxType(fir::unwrapRefType(ptrType),
+ /*isPolymorphic=*/useDynamicType);
+ // Use the shape with proper lower bounds for the final box.
+ shape = builder.genShape(loc, lbounds, extents);
mlir::Value newBox =
builder.createBox(loc, tempBoxType, base, shape, /*slice=*/nullptr,
- typeParams, /*tdesc=*/nullptr);
- return newBox;
+ typeParams, useDynamicType ? origBox : nullptr);
+ // The new box might be !fir.class, while the original might be
+ // !fir.box - we have to add a conversion.
+ return builder.createConvert(loc, origBox.getType(), newBox);
}
mlir::FailureOr<mlir::Value>
@@ -280,16 +303,11 @@ PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder,
<< op.getOperation() << '\n';
}
- mlir::Value tempBox =
- allocateTempBuffer(builder, loc, op.getStack(), box, extents, typeParams);
+ mlir::Value tempBox = allocateTempBuffer(builder, loc, op.getStack(), box,
+ lbounds, extents, typeParams);
if (!op.getNoCopy())
fir::runtime::genShallowCopy(builder, loc, tempBox, box,
/*resultIsAllocated=*/true);
-
- // Set lower bounds after the original box.
- mlir::Value shift = builder.genShift(loc, lbounds);
- tempBox = builder.create<fir::ReboxOp>(loc, boxType, tempBox, shift,
- /*slice=*/nullptr);
builder.create<fir::ResultOp>(loc, tempBox);
return ifOp.getResult(0);
diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp
index 7dbf21ce0c12..b60a72e4340b 100644
--- a/flang/lib/Optimizer/CodeGen/Target.cpp
+++ b/flang/lib/Optimizer/CodeGen/Target.cpp
@@ -1443,14 +1443,35 @@ struct TargetAMDGPU : public GenericTarget<TargetAMDGPU> {
CodeGenSpecifics::Marshalling
complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
CodeGenSpecifics::Marshalling marshal;
- TODO(loc, "handle complex argument types");
+ const auto *sem = &floatToSemantics(kindMap, eleTy);
+ if (sem == &llvm::APFloat::IEEEsingle()) {
+ // Lower COMPLEX(KIND=4) as an array of two element values.
+ marshal.emplace_back(fir::SequenceType::get({2}, eleTy), AT{});
+ } else if (sem == &llvm::APFloat::IEEEdouble()) {
+ // Pass COMPLEX(KIND=8) as two separate arguments.
+ marshal.emplace_back(eleTy, AT{});
+ marshal.emplace_back(eleTy, AT{});
+ } else {
+ typeTodo(sem, loc, "argument");
+ }
return marshal;
}
CodeGenSpecifics::Marshalling
complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
CodeGenSpecifics::Marshalling marshal;
- TODO(loc, "handle complex return types");
+ const auto *sem = &floatToSemantics(kindMap, eleTy);
+ if (sem == &llvm::APFloat::IEEEsingle()) {
+ // Return COMPLEX(KIND=4) as an array of two elements.
+ marshal.emplace_back(fir::SequenceType::get({2}, eleTy), AT{});
+ } else if (sem == &llvm::APFloat::IEEEdouble()) {
+ // Return COMPLEX(KIND=8) via an aggregate with two fields.
+ marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
+ mlir::TypeRange{eleTy, eleTy}),
+ AT{});
+ } else {
+ typeTodo(sem, loc, "return");
+ }
return marshal;
}
};