diff options
Diffstat (limited to 'flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp')
| -rw-r--r-- | flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp | 391 |
1 files changed, 262 insertions, 129 deletions
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 9327e7ad5875..bf3cf861e46f 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -159,28 +159,162 @@ containsReadOrWriteEffectOn(const mlir::MemoryEffects::EffectInstance &effect, return mlir::AliasResult::NoAlias; } -// Returns true if the given array references represent identical -// or completely disjoint array slices. The callers may use this -// method when the alias analysis reports an alias of some kind, -// so that we can run Fortran specific analysis on the array slices -// to see if they are identical or disjoint. Note that the alias -// analysis are not able to give such an answer about the references. -static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) { +// Helper class for analyzing two array slices represented +// by two hlfir.designate operations. +class ArraySectionAnalyzer { +public: + // The result of the analyzis is one of the values below. + enum class SlicesOverlapKind { + // Slices overlap is unknown. + Unknown, + // Slices are definitely identical. + DefinitelyIdentical, + // Slices are definitely disjoint. + DefinitelyDisjoint, + // Slices may be either disjoint or identical, + // i.e. there is definitely no partial overlap. + EitherIdenticalOrDisjoint + }; + + // Analyzes two hlfir.designate results and returns the overlap kind. + // The callers may use this method when the alias analysis reports + // an alias of some kind, so that we can run Fortran specific analysis + // on the array slices to see if they are identical or disjoint. + // Note that the alias analysis are not able to give such an answer + // about the references. + static SlicesOverlapKind analyze(mlir::Value ref1, mlir::Value ref2); + +private: + struct SectionDesc { + // An array section is described by <lb, ub, stride> tuple. + // If the designator's subscript is not a triple, then + // the section descriptor is constructed as <lb, nullptr, nullptr>. + mlir::Value lb, ub, stride; + + SectionDesc(mlir::Value lb, mlir::Value ub, mlir::Value stride) + : lb(lb), ub(ub), stride(stride) { + assert(lb && "lower bound or index must be specified"); + normalize(); + } + + // Normalize the section descriptor: + // 1. If UB is nullptr, then it is set to LB. + // 2. If LB==UB, then stride does not matter, + // so it is reset to nullptr. + // 3. If STRIDE==1, then it is reset to nullptr. + void normalize() { + if (!ub) + ub = lb; + if (lb == ub) + stride = nullptr; + if (stride) + if (auto val = fir::getIntIfConstant(stride)) + if (*val == 1) + stride = nullptr; + } + + bool operator==(const SectionDesc &other) const { + return lb == other.lb && ub == other.ub && stride == other.stride; + } + }; + + // Given an operand_iterator over the indices operands, + // read the subscript values and return them as SectionDesc + // updating the iterator. If isTriplet is true, + // the subscript is a triplet, and the result is <lb, ub, stride>. + // Otherwise, the subscript is a scalar index, and the result + // is <index, nullptr, nullptr>. + static SectionDesc readSectionDesc(mlir::Operation::operand_iterator &it, + bool isTriplet) { + if (isTriplet) + return {*it++, *it++, *it++}; + return {*it++, nullptr, nullptr}; + } + + // Return the ordered lower and upper bounds of the section. + // If stride is known to be non-negative, then the ordered + // bounds match the <lb, ub> of the descriptor. + // If stride is known to be negative, then the ordered + // bounds are <ub, lb> of the descriptor. + // If stride is unknown, we cannot deduce any order, + // so the result is <nullptr, nullptr> + static std::pair<mlir::Value, mlir::Value> + getOrderedBounds(const SectionDesc &desc) { + mlir::Value stride = desc.stride; + // Null stride means stride=1. + if (!stride) + return {desc.lb, desc.ub}; + // Reverse the bounds, if stride is negative. + if (auto val = fir::getIntIfConstant(stride)) { + if (*val >= 0) + return {desc.lb, desc.ub}; + else + return {desc.ub, desc.lb}; + } + + return {nullptr, nullptr}; + } + + // Given two array sections <lb1, ub1, stride1> and + // <lb2, ub2, stride2>, return true only if the sections + // are known to be disjoint. + // + // For example, for any positive constant C: + // X:Y does not overlap with (Y+C):Z + // X:Y does not overlap with Z:(X-C) + static bool areDisjointSections(const SectionDesc &desc1, + const SectionDesc &desc2) { + auto [lb1, ub1] = getOrderedBounds(desc1); + auto [lb2, ub2] = getOrderedBounds(desc2); + if (!lb1 || !lb2) + return false; + // Note that this comparison must be made on the ordered bounds, + // otherwise 'a(x:y:1) = a(z:x-1:-1) + 1' may be incorrectly treated + // as not overlapping (x=2, y=10, z=9). + if (isLess(ub1, lb2) || isLess(ub2, lb1)) + return true; + return false; + } + + // Given two array sections <lb1, ub1, stride1> and + // <lb2, ub2, stride2>, return true only if the sections + // are known to be identical. + // + // For example: + // <x, x, stride> + // <x, nullptr, nullptr> + // + // These sections are identical, from the point of which array + // elements are being addresses, even though the shape + // of the array slices might be different. + static bool areIdenticalSections(const SectionDesc &desc1, + const SectionDesc &desc2) { + if (desc1 == desc2) + return true; + return false; + } + + // Return true, if v1 is known to be less than v2. + static bool isLess(mlir::Value v1, mlir::Value v2); +}; + +ArraySectionAnalyzer::SlicesOverlapKind +ArraySectionAnalyzer::analyze(mlir::Value ref1, mlir::Value ref2) { if (ref1 == ref2) - return true; + return SlicesOverlapKind::DefinitelyIdentical; auto des1 = ref1.getDefiningOp<hlfir::DesignateOp>(); auto des2 = ref2.getDefiningOp<hlfir::DesignateOp>(); // We only support a pair of designators right now. if (!des1 || !des2) - return false; + return SlicesOverlapKind::Unknown; if (des1.getMemref() != des2.getMemref()) { // If the bases are different, then there is unknown overlap. LLVM_DEBUG(llvm::dbgs() << "No identical base for:\n" << des1 << "and:\n" << des2 << "\n"); - return false; + return SlicesOverlapKind::Unknown; } // Require all components of the designators to be the same. @@ -194,104 +328,105 @@ static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) { LLVM_DEBUG(llvm::dbgs() << "Different designator specs for:\n" << des1 << "and:\n" << des2 << "\n"); - return false; - } - - if (des1.getIsTriplet() != des2.getIsTriplet()) { - LLVM_DEBUG(llvm::dbgs() << "Different sections for:\n" - << des1 << "and:\n" - << des2 << "\n"); - return false; + return SlicesOverlapKind::Unknown; } // Analyze the subscripts. - // For example: - // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %0) shape %9 - // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %1) shape %9 - // - // If all the triplets (section speficiers) are the same, then - // we do not care if %0 is equal to %1 - the slices are either - // identical or completely disjoint. auto des1It = des1.getIndices().begin(); auto des2It = des2.getIndices().begin(); bool identicalTriplets = true; - for (bool isTriplet : des1.getIsTriplet()) { - if (isTriplet) { - for (int i = 0; i < 3; ++i) - if (*des1It++ != *des2It++) { - LLVM_DEBUG(llvm::dbgs() << "Triplet mismatch for:\n" - << des1 << "and:\n" - << des2 << "\n"); - identicalTriplets = false; - break; - } - } else { - ++des1It; - ++des2It; + bool identicalIndices = true; + for (auto [isTriplet1, isTriplet2] : + llvm::zip(des1.getIsTriplet(), des2.getIsTriplet())) { + SectionDesc desc1 = readSectionDesc(des1It, isTriplet1); + SectionDesc desc2 = readSectionDesc(des2It, isTriplet2); + + // See if we can prove that any of the sections do not overlap. + // This is mostly a Polyhedron/nf performance hack that looks for + // particular relations between the lower and upper bounds + // of the array sections, e.g. for any positive constant C: + // X:Y does not overlap with (Y+C):Z + // X:Y does not overlap with Z:(X-C) + if (areDisjointSections(desc1, desc2)) + return SlicesOverlapKind::DefinitelyDisjoint; + + if (!areIdenticalSections(desc1, desc2)) { + if (isTriplet1 || isTriplet2) { + // For example: + // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %0) + // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %1) + // + // If all the triplets (section speficiers) are the same, then + // we do not care if %0 is equal to %1 - the slices are either + // identical or completely disjoint. + // + // Also, treat these as identical sections: + // hlfir.designate %6#0 (%c2:%c2:%c1) + // hlfir.designate %6#0 (%c2) + identicalTriplets = false; + LLVM_DEBUG(llvm::dbgs() << "Triplet mismatch for:\n" + << des1 << "and:\n" + << des2 << "\n"); + } else { + identicalIndices = false; + LLVM_DEBUG(llvm::dbgs() << "Indices mismatch for:\n" + << des1 << "and:\n" + << des2 << "\n"); + } } } - if (identicalTriplets) - return true; - // See if we can prove that any of the triplets do not overlap. - // This is mostly a Polyhedron/nf performance hack that looks for - // particular relations between the lower and upper bounds - // of the array sections, e.g. for any positive constant C: - // X:Y does not overlap with (Y+C):Z - // X:Y does not overlap with Z:(X-C) - auto displacedByConstant = [](mlir::Value v1, mlir::Value v2) { - auto removeConvert = [](mlir::Value v) -> mlir::Operation * { - auto *op = v.getDefiningOp(); - while (auto conv = mlir::dyn_cast_or_null<fir::ConvertOp>(op)) - op = conv.getValue().getDefiningOp(); - return op; - }; + if (identicalTriplets) { + if (identicalIndices) + return SlicesOverlapKind::DefinitelyIdentical; + else + return SlicesOverlapKind::EitherIdenticalOrDisjoint; + } - auto isPositiveConstant = [](mlir::Value v) -> bool { - if (auto conOp = - mlir::dyn_cast<mlir::arith::ConstantOp>(v.getDefiningOp())) - if (auto iattr = mlir::dyn_cast<mlir::IntegerAttr>(conOp.getValue())) - return iattr.getInt() > 0; - return false; - }; + LLVM_DEBUG(llvm::dbgs() << "Different sections for:\n" + << des1 << "and:\n" + << des2 << "\n"); + return SlicesOverlapKind::Unknown; +} - auto *op1 = removeConvert(v1); - auto *op2 = removeConvert(v2); - if (!op1 || !op2) - return false; - if (auto addi = mlir::dyn_cast<mlir::arith::AddIOp>(op2)) - if ((addi.getLhs().getDefiningOp() == op1 && - isPositiveConstant(addi.getRhs())) || - (addi.getRhs().getDefiningOp() == op1 && - isPositiveConstant(addi.getLhs()))) - return true; - if (auto subi = mlir::dyn_cast<mlir::arith::SubIOp>(op1)) - if (subi.getLhs().getDefiningOp() == op2 && - isPositiveConstant(subi.getRhs())) - return true; +bool ArraySectionAnalyzer::isLess(mlir::Value v1, mlir::Value v2) { + auto removeConvert = [](mlir::Value v) -> mlir::Operation * { + auto *op = v.getDefiningOp(); + while (auto conv = mlir::dyn_cast_or_null<fir::ConvertOp>(op)) + op = conv.getValue().getDefiningOp(); + return op; + }; + + auto isPositiveConstant = [](mlir::Value v) -> bool { + if (auto val = fir::getIntIfConstant(v)) + return *val > 0; return false; }; - des1It = des1.getIndices().begin(); - des2It = des2.getIndices().begin(); - for (bool isTriplet : des1.getIsTriplet()) { - if (isTriplet) { - mlir::Value des1Lb = *des1It++; - mlir::Value des1Ub = *des1It++; - mlir::Value des2Lb = *des2It++; - mlir::Value des2Ub = *des2It++; - // Ignore strides. - ++des1It; - ++des2It; - if (displacedByConstant(des1Ub, des2Lb) || - displacedByConstant(des2Ub, des1Lb)) - return true; - } else { - ++des1It; - ++des2It; - } - } + auto *op1 = removeConvert(v1); + auto *op2 = removeConvert(v2); + if (!op1 || !op2) + return false; + // Check if they are both constants. + if (auto val1 = fir::getIntIfConstant(op1->getResult(0))) + if (auto val2 = fir::getIntIfConstant(op2->getResult(0))) + return *val1 < *val2; + + // Handle some variable cases (C > 0): + // v2 = v1 + C + // v2 = C + v1 + // v1 = v2 - C + if (auto addi = mlir::dyn_cast<mlir::arith::AddIOp>(op2)) + if ((addi.getLhs().getDefiningOp() == op1 && + isPositiveConstant(addi.getRhs())) || + (addi.getRhs().getDefiningOp() == op1 && + isPositiveConstant(addi.getLhs()))) + return true; + if (auto subi = mlir::dyn_cast<mlir::arith::SubIOp>(op1)) + if (subi.getLhs().getDefiningOp() == op2 && + isPositiveConstant(subi.getRhs())) + return true; return false; } @@ -328,44 +463,36 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) { match.array = match.assign.getLhs(); mlir::Type arrayType = mlir::dyn_cast<fir::SequenceType>( fir::unwrapPassByRefType(match.array.getType())); - if (!arrayType) + if (!arrayType) { + LLVM_DEBUG(llvm::dbgs() << "AssignOp's result is not an array\n"); return std::nullopt; + } // require that the array elements are trivial // TODO: this is just to make the pass easier to think about. Not an inherent // limitation mlir::Type eleTy = hlfir::getFortranElementType(arrayType); - if (!fir::isa_trivial(eleTy)) + if (!fir::isa_trivial(eleTy)) { + LLVM_DEBUG(llvm::dbgs() << "AssignOp's data type is not trivial\n"); return std::nullopt; + } - // the array must have the same shape as the elemental. CSE should have - // deduplicated the fir.shape operations where they are provably the same - // so we just have to check for the same ssa value - // TODO: add more ways of getting the shape of the array - mlir::Value arrayShape; - if (match.array.getDefiningOp()) - arrayShape = - mlir::TypeSwitch<mlir::Operation *, mlir::Value>( - match.array.getDefiningOp()) - .Case([](hlfir::DesignateOp designate) { - return designate.getShape(); - }) - .Case([](hlfir::DeclareOp declare) { return declare.getShape(); }) - .Default([](mlir::Operation *) { return mlir::Value{}; }); - if (!arrayShape) { - LLVM_DEBUG(llvm::dbgs() << "Can't get shape of " << match.array << " at " - << elemental->getLoc() << "\n"); + // The array must have the same shape as the elemental. + // + // f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be + // conformable unless the lhs is an allocatable array. In HLFIR we can + // see this from the presence or absence of the realloc attribute on + // hlfir.assign. If it is not a realloc assignment, we can trust that + // the shapes do conform. + // + // TODO: the lhs's shape is dynamic, so it is hard to prove that + // there is no reallocation of the lhs due to the assignment. + // We can probably try generating multiple versions of the code + // with checking for the shape match, length parameters match, etc. + if (match.assign.isAllocatableAssignment()) { + LLVM_DEBUG(llvm::dbgs() << "AssignOp may involve (re)allocation of LHS\n"); return std::nullopt; } - if (arrayShape != elemental.getShape()) { - // f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be - // conformable unless the lhs is an allocatable array. In HLFIR we can - // see this from the presence or absence of the realloc attribute on - // hlfir.assign. If it is not a realloc assignment, we can trust that - // the shapes do conform - if (match.assign.getRealloc()) - return std::nullopt; - } // the transformation wants to apply the elemental in a do-loop at the // hlfir.assign, check there are no effects which make this unsafe @@ -419,21 +546,27 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) { if (!res.isPartial()) { if (auto designate = effect.getValue().getDefiningOp<hlfir::DesignateOp>()) { - if (!areIdenticalOrDisjointSlices(match.array, designate.getMemref())) { + ArraySectionAnalyzer::SlicesOverlapKind overlap = + ArraySectionAnalyzer::analyze(match.array, designate.getMemref()); + if (overlap == + ArraySectionAnalyzer::SlicesOverlapKind::DefinitelyDisjoint) + continue; + + if (overlap == ArraySectionAnalyzer::SlicesOverlapKind::Unknown) { LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate << " at " << elemental.getLoc() << "\n"); return std::nullopt; } auto indices = designate.getIndices(); auto elementalIndices = elemental.getIndices(); - if (indices.size() != elementalIndices.size()) { - LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate - << " at " << elemental.getLoc() << "\n"); - return std::nullopt; - } - if (std::equal(indices.begin(), indices.end(), elementalIndices.begin(), + if (indices.size() == elementalIndices.size() && + std::equal(indices.begin(), indices.end(), elementalIndices.begin(), elementalIndices.end())) continue; + + LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate + << " at " << elemental.getLoc() << "\n"); + return std::nullopt; } } LLVM_DEBUG(llvm::dbgs() << "disallowed side-effect: " << effect.getValue() @@ -1239,7 +1372,7 @@ public: // patterns.insert<ReductionMaskConversion<hlfir::MaxvalOp>>(context); // patterns.insert<ReductionMaskConversion<hlfir::MinvalOp>>(context); - if (mlir::failed(mlir::applyPatternsAndFoldGreedily( + if (mlir::failed(mlir::applyPatternsGreedily( getOperation(), std::move(patterns), config))) { mlir::emitError(getOperation()->getLoc(), "failure in HLFIR optimized bufferization"); |
