summaryrefslogtreecommitdiff
path: root/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp')
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp468
1 files changed, 412 insertions, 56 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index a4c2641fe631..f638d391d55c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//
+#include <numeric>
+
#include "CIRGenOpenACCRecipe.h"
namespace clang::CIRGen {
@@ -34,11 +36,194 @@ mlir::Block *OpenACCRecipeBuilderBase::createRecipeBlock(mlir::Region &region,
llvm::SmallVector<mlir::Location> locs{types.size(), loc};
return builder.createBlock(&region, region.end(), types, locs);
}
+void OpenACCRecipeBuilderBase::makeAllocaCopy(mlir::Location loc,
+ mlir::Type copyType,
+ mlir::Value numEltsToCopy,
+ mlir::Value offsetPerSubarray,
+ mlir::Value destAlloca,
+ mlir::Value srcAlloca) {
+ mlir::OpBuilder::InsertionGuard guardCase(builder);
+
+ mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
+ auto itrPtrTy = cir::PointerType::get(itrTy);
+ mlir::IntegerAttr itrAlign =
+ cgf.cgm.getSize(cgf.getContext().getTypeAlignInChars(
+ cgf.getContext().UnsignedLongLongTy));
+
+ auto loopBuilder = [&]() {
+ auto itr =
+ cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", itrAlign);
+ cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0);
+ builder.CIRBaseBuilderTy::createStore(loc, constZero, itr);
+ builder.createFor(
+ loc,
+ /*condBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ // itr < numEltsToCopy
+ // Enforce a trip count of 1 if there wasn't any element count, this
+ // way we can just use this loop with a constant bounds instead of a
+ // separate code path.
+ if (!numEltsToCopy)
+ numEltsToCopy = builder.getConstInt(loc, itrTy, 1);
+
+ auto loadCur = cir::LoadOp::create(builder, loc, {itr});
+ auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadCur,
+ numEltsToCopy);
+ builder.createCondition(cmp);
+ },
+ /*bodyBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ // destAlloca[itr] = srcAlloca[offsetPerSubArray * itr];
+ auto loadCur = cir::LoadOp::create(builder, loc, {itr});
+ auto srcOffset = builder.createMul(loc, offsetPerSubarray, loadCur);
+
+ auto ptrToOffsetIntoSrc = cir::PtrStrideOp::create(
+ builder, loc, copyType, srcAlloca, srcOffset);
+
+ auto offsetIntoDecayDest = cir::PtrStrideOp::create(
+ builder, loc, builder.getPointerTo(copyType), destAlloca,
+ loadCur);
+
+ builder.CIRBaseBuilderTy::createStore(loc, ptrToOffsetIntoSrc,
+ offsetIntoDecayDest);
+ builder.createYield(loc);
+ },
+ /*stepBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ // Simple increment of the iterator.
+ auto load = cir::LoadOp::create(builder, loc, {itr});
+ auto inc = cir::UnaryOp::create(builder, loc, load.getType(),
+ cir::UnaryOpKind::Inc, load);
+ builder.CIRBaseBuilderTy::createStore(loc, inc, itr);
+ builder.createYield(loc);
+ });
+ };
+
+ cir::ScopeOp::create(builder, loc,
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ loopBuilder();
+ builder.createYield(loc);
+ });
+}
+
+mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca(
+ mlir::Block *block, SourceRange exprRange, mlir::Location loc,
+ std::string_view allocaName, size_t numBounds,
+ llvm::ArrayRef<QualType> boundTypes) {
+ mlir::OpBuilder::InsertionGuard guardCase(builder);
+
+ // Get the range of bounds arguments, which are all but the 1st arg.
+ llvm::ArrayRef<mlir::BlockArgument> boundsRange =
+ block->getArguments().drop_front(1);
+
+ // boundTypes contains the before and after of each bounds, so it ends up
+ // having 1 extra. Assert this is the case to ensure we don't call this in the
+ // wrong 'block'.
+ assert(boundsRange.size() + 1 == boundTypes.size());
+
+ mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
+ auto idxType = mlir::IndexType::get(&cgf.getMLIRContext());
+
+ auto getUpperBound = [&](mlir::Value bound) {
+ auto upperBoundVal =
+ mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound);
+ return mlir::UnrealizedConversionCastOp::create(builder, loc, itrTy,
+ upperBoundVal.getResult())
+ .getResult(0);
+ };
+
+ auto isArrayTy = [&](QualType ty) {
+ if (ty->isArrayType() && !ty->isConstantArrayType())
+ cgf.cgm.errorNYI(exprRange, "OpenACC recipe init for VLAs");
+ return ty->isConstantArrayType();
+ };
+
+ mlir::Type topLevelTy = cgf.convertType(boundTypes.back());
+ cir::PointerType topLevelTyPtr = builder.getPointerTo(topLevelTy);
+ // Do an alloca for the 'top' level type without bounds.
+ mlir::Value initialAlloca = builder.createAlloca(
+ loc, topLevelTyPtr, topLevelTy, allocaName,
+ cgf.getContext().getTypeAlignInChars(boundTypes.back()));
+
+ bool lastBoundWasArray = isArrayTy(boundTypes.back());
+
+ // Make sure we track a moving version of this so we can get our
+ // 'copying' back to correct.
+ mlir::Value lastAlloca = initialAlloca;
+
+ // Since we're iterating the types in reverse, this sets up for each index
+ // corresponding to the boundsRange to be the 'after application of the
+ // bounds.
+ llvm::ArrayRef<QualType> boundResults = boundTypes.drop_back(1);
+
+ // Collect the 'do we have any allocas needed after this type' list.
+ llvm::SmallVector<bool> allocasLeftArr;
+ llvm::ArrayRef<QualType> resultTypes = boundTypes.drop_front();
+ std::transform_inclusive_scan(
+ resultTypes.begin(), resultTypes.end(),
+ std::back_inserter(allocasLeftArr), std::plus<bool>{},
+ [](QualType ty) { return !ty->isConstantArrayType(); }, false);
+
+ // Keep track of the number of 'elements' that we're allocating. Individual
+ // allocas should multiply this by the size of its current allocation.
+ mlir::Value cumulativeElts;
+ for (auto [bound, resultType, allocasLeft] : llvm::reverse(
+ llvm::zip_equal(boundsRange, boundResults, allocasLeftArr))) {
+
+ // if there is no further 'alloca' operation we need to do, we can skip
+ // creating the UB/multiplications/etc.
+ if (!allocasLeft)
+ break;
+
+ // First: figure out the number of elements in the current 'bound' list.
+ mlir::Value eltsPerSubArray = getUpperBound(bound);
+ mlir::Value eltsToAlloca;
+
+ // IF we are in a sub-bounds, the total number of elements to alloca is
+ // the product of that one and the current 'bounds' size. That is,
+ // arr[5][5], we would need 25 elements, not just 5. Else it is just the
+ // current number of elements.
+ if (cumulativeElts)
+ eltsToAlloca = builder.createMul(loc, eltsPerSubArray, cumulativeElts);
+ else
+ eltsToAlloca = eltsPerSubArray;
+
+ if (!lastBoundWasArray) {
+ // If we have to do an allocation, figure out the size of the
+ // allocation. alloca takes the number of bytes, not elements.
+ TypeInfoChars eltInfo = cgf.getContext().getTypeInfoInChars(resultType);
+ cir::ConstantOp eltSize = builder.getConstInt(
+ loc, itrTy, eltInfo.Width.alignTo(eltInfo.Align).getQuantity());
+ mlir::Value curSize = builder.createMul(loc, eltsToAlloca, eltSize);
+
+ mlir::Type eltTy = cgf.convertType(resultType);
+ cir::PointerType ptrTy = builder.getPointerTo(eltTy);
+ mlir::Value curAlloca = builder.createAlloca(
+ loc, ptrTy, eltTy, "openacc.init.bounds",
+ cgf.getContext().getTypeAlignInChars(resultType), curSize);
+
+ makeAllocaCopy(loc, ptrTy, cumulativeElts, eltsPerSubArray, lastAlloca,
+ curAlloca);
+ lastAlloca = curAlloca;
+ } else {
+ // In the case of an array, we just need to decay the pointer, so just do
+ // a zero-offset stride on the last alloca to decay it down an array
+ // level.
+ cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0);
+ lastAlloca = builder.getArrayElement(loc, loc, lastAlloca,
+ cgf.convertType(resultType),
+ constZero, /*shouldDecay=*/true);
+ }
+
+ cumulativeElts = eltsToAlloca;
+ lastBoundWasArray = isArrayTy(resultType);
+ }
+ return initialAlloca;
+}
-mlir::Value
-OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
- mlir::Value bound,
- mlir::Location loc, bool inverse) {
+std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop(
+ mlir::Value subscriptedValue, mlir::Value subscriptedValue2,
+ mlir::Value bound, mlir::Location loc, bool inverse) {
mlir::Operation *bodyInsertLoc;
mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
@@ -54,7 +239,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
if (auto arrayTy = dyn_cast<cir::ArrayType>(eltTy))
return builder.getArrayElement(loc, loc, subVal, arrayTy.getElementType(),
- idxLoad.getResult(),
+ idxLoad,
/*shouldDecay=*/true);
assert(isa<cir::PointerType>(eltTy));
@@ -62,8 +247,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
auto eltLoad = cir::LoadOp::create(builder, loc, {subVal});
return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad,
- idxLoad.getResult())
- .getResult();
+ idxLoad);
};
auto forStmtBuilder = [&]() {
@@ -85,12 +269,11 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
if (inverse) {
cir::ConstantOp constOne = builder.getConstInt(loc, itrTy, 1);
- auto sub =
- cir::BinOp::create(builder, loc, itrTy, cir::BinOpKind::Sub,
- ubConversion.getResult(0), constOne.getResult());
+ auto sub = cir::BinOp::create(builder, loc, itrTy, cir::BinOpKind::Sub,
+ ubConversion.getResult(0), constOne);
// Upperbound is exclusive, so subtract 1.
- builder.CIRBaseBuilderTy::createStore(loc, sub.getResult(), itr);
+ builder.CIRBaseBuilderTy::createStore(loc, sub, itr);
} else {
// Lowerbound is inclusive, so we can include it.
builder.CIRBaseBuilderTy::createStore(loc, lbConversion.getResult(0),
@@ -108,8 +291,8 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
auto loadCur = cir::LoadOp::create(builder, loc, {itr});
// Use 'not equal' since we are just doing an increment/decrement.
auto cmp = builder.createCompare(
- loc, inverse ? cir::CmpOpKind::ge : cir::CmpOpKind::lt,
- loadCur.getResult(), endItr.getResult(0));
+ loc, inverse ? cir::CmpOpKind::ge : cir::CmpOpKind::lt, loadCur,
+ endItr.getResult(0));
builder.createCondition(cmp);
},
/*bodyBuilder=*/
@@ -118,16 +301,17 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
if (subscriptedValue)
subscriptedValue = doSubscriptOp(subscriptedValue, load);
+ if (subscriptedValue2)
+ subscriptedValue2 = doSubscriptOp(subscriptedValue2, load);
bodyInsertLoc = builder.createYield(loc);
},
/*stepBuilder=*/
[&](mlir::OpBuilder &b, mlir::Location loc) {
auto load = cir::LoadOp::create(builder, loc, {itr});
- auto unary = cir::UnaryOp::create(builder, loc, load.getType(),
- inverse ? cir::UnaryOpKind::Dec
- : cir::UnaryOpKind::Inc,
- load.getResult());
- builder.CIRBaseBuilderTy::createStore(loc, unary.getResult(), itr);
+ auto unary = cir::UnaryOp::create(
+ builder, loc, load.getType(),
+ inverse ? cir::UnaryOpKind::Dec : cir::UnaryOpKind::Inc, load);
+ builder.CIRBaseBuilderTy::createStore(loc, unary, itr);
builder.createYield(loc);
});
};
@@ -141,7 +325,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
// Leave the insertion point to be inside the body, so we can loop over
// these things.
builder.setInsertionPoint(bodyInsertLoc);
- return subscriptedValue;
+ return {subscriptedValue, subscriptedValue2};
}
mlir::acc::ReductionOperator
@@ -214,25 +398,50 @@ void OpenACCRecipeBuilderBase::createRecipeDestroySection(
emitDestroy(block->getArgument(1), elementTy);
}
+ ls.forceCleanup();
mlir::acc::YieldOp::create(builder, locEnd);
}
+void OpenACCRecipeBuilderBase::makeBoundsInit(
+ mlir::Value alloca, mlir::Location loc, mlir::Block *block,
+ const VarDecl *allocaDecl, QualType origType, bool isInitSection) {
+ mlir::OpBuilder::InsertionGuard guardCase(builder);
+ builder.setInsertionPointToEnd(block);
+ CIRGenFunction::LexicalScope ls(cgf, loc, block);
+
+ CIRGenFunction::AutoVarEmission tempDeclEmission{*allocaDecl};
+ tempDeclEmission.emittedAsOffload = true;
+
+ // The init section is the only one of the handful that only has a single
+ // argument for the 'type', so we have to drop 1 for init, and future calls
+ // to this will need to drop 2.
+ llvm::MutableArrayRef<mlir::BlockArgument> boundsRange =
+ block->getArguments().drop_front(isInitSection ? 1 : 2);
+
+ mlir::Value subscriptedValue = alloca;
+ for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
+ subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc,
+ /*inverse=*/false);
+
+ tempDeclEmission.setAllocatedAddress(
+ Address{subscriptedValue, cgf.convertType(origType),
+ cgf.getContext().getDeclAlign(allocaDecl)});
+ cgf.emitAutoVarInit(tempDeclEmission);
+}
-// TODO: OpenACC: When we get this implemented for the reduction/firstprivate,
-// this might end up re-merging with createRecipeInitCopy. For now, keep it
-// separate until we're sure what everything looks like to keep this as clean
-// as possible.
-void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
+// TODO: OpenACC: when we start doing firstprivate for array/vlas/etc, we
+// probably need to do a little work about the 'init' calls to put it in 'copy'
+// region instead.
+void OpenACCRecipeBuilderBase::createInitRecipe(
mlir::Location loc, mlir::Location locEnd, SourceRange exprRange,
- mlir::Value mainOp, mlir::acc::PrivateRecipeOp recipe, size_t numBounds,
+ mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds,
llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl,
- QualType origType, const Expr *initExpr) {
+ QualType origType, bool emitInitExpr) {
assert(allocaDecl && "Required recipe variable not set?");
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl};
- mlir::Block *block =
- createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc,
- numBounds, /*isInit=*/true);
- builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
+ mlir::Block *block = createRecipeBlock(recipeInitRegion, mainOp.getType(),
+ loc, numBounds, /*isInit=*/true);
+ builder.setInsertionPointToEnd(&recipeInitRegion.back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
const Type *allocaPointeeType =
@@ -248,7 +457,7 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
// Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll
// emit an error to tell us. However, emitting those errors during
// production is a violation of the standard, so we cannot do them.
- cgf.cgm.errorNYI(exprRange, "private default-init recipe");
+ cgf.cgm.errorNYI(exprRange, "private/reduction default-init recipe");
}
if (!numBounds) {
@@ -256,61 +465,208 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
// initialize this variable correctly.
CIRGenFunction::AutoVarEmission tempDeclEmission =
cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint());
- cgf.emitAutoVarInit(tempDeclEmission);
+ if (emitInitExpr)
+ cgf.emitAutoVarInit(tempDeclEmission);
} else {
- cgf.cgm.errorNYI(exprRange, "private-init with bounds");
+ mlir::Value alloca = makeBoundsAlloca(
+ block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes);
+
+ // If the initializer is trivial, there is nothing to do here, so save
+ // ourselves some effort.
+ if (emitInitExpr && allocaDecl->getInit() &&
+ (!cgf.isTrivialInitializer(allocaDecl->getInit()) ||
+ cgf.getContext().getLangOpts().getTrivialAutoVarInit() !=
+ LangOptions::TrivialAutoVarInitKind::Uninitialized))
+ makeBoundsInit(alloca, loc, block, allocaDecl, origType,
+ /*isInitSection=*/true);
}
+ ls.forceCleanup();
mlir::acc::YieldOp::create(builder, locEnd);
}
void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy(
mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
- CIRGenFunction::AutoVarEmission tempDeclEmission,
- mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe,
- const VarDecl *temporary) {
- mlir::Block *block =
- createRecipeBlock(recipe.getCopyRegion(), mainOp.getType(), loc,
- /*numBounds=*/0, /*isInit=*/false);
- builder.setInsertionPointToEnd(&recipe.getCopyRegion().back());
+ const VarDecl *allocaDecl, const VarDecl *temporary,
+ mlir::Region &copyRegion, size_t numBounds) {
+ mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc,
+ numBounds, /*isInit=*/false);
+ builder.setInsertionPointToEnd(&copyRegion.back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
- mlir::BlockArgument fromArg = block->getArgument(0);
- mlir::BlockArgument toArg = block->getArgument(1);
+ mlir::Value fromArg = block->getArgument(0);
+ mlir::Value toArg = block->getArgument(1);
- mlir::Type elementTy =
- mlir::cast<cir::PointerType>(mainOp.getType()).getPointee();
+ llvm::MutableArrayRef<mlir::BlockArgument> boundsRange =
+ block->getArguments().drop_front(2);
+
+ for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
+ std::tie(fromArg, toArg) =
+ createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false);
- // Set the address of the emission to be the argument, so that we initialize
- // that instead of the variable in the other block.
+ // Set up the 'to' address.
+ mlir::Type elementTy =
+ mlir::cast<cir::PointerType>(toArg.getType()).getPointee();
+ CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl);
+ tempDeclEmission.emittedAsOffload = true;
tempDeclEmission.setAllocatedAddress(
- Address{toArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)});
- tempDeclEmission.EmittedAsOffload = true;
+ Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)});
+ // Set up the 'from' address from the temporary.
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary};
cgf.setAddrOfLocalVar(
temporary,
- Address{fromArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)});
-
+ Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)});
cgf.emitAutoVarInit(tempDeclEmission);
+
+ builder.setInsertionPointToEnd(&copyRegion.back());
+ ls.forceCleanup();
mlir::acc::YieldOp::create(builder, locEnd);
}
+
// This function generates the 'combiner' section for a reduction recipe. Note
// that this function is not 'insertion point' clean, in that it alters the
// insertion point to be inside of the 'combiner' section of the recipe, but
// doesn't restore it aftewards.
void OpenACCRecipeBuilderBase::createReductionRecipeCombiner(
mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
- mlir::acc::ReductionRecipeOp recipe) {
- mlir::Block *block = builder.createBlock(
- &recipe.getCombinerRegion(), recipe.getCombinerRegion().end(),
- {mainOp.getType(), mainOp.getType()}, {loc, loc});
+ mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType,
+ llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes) {
+ mlir::Block *block =
+ createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc,
+ numBounds, /*isInit=*/false);
builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
- mlir::BlockArgument lhsArg = block->getArgument(0);
+ mlir::Value lhsArg = block->getArgument(0);
+ mlir::Value rhsArg = block->getArgument(1);
+ llvm::MutableArrayRef<mlir::BlockArgument> boundsRange =
+ block->getArguments().drop_front(2);
+
+ if (llvm::any_of(combinerRecipes, [](auto &r) { return r.Op == nullptr; })) {
+ cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner not generated");
+ mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0));
+ return;
+ }
+
+ // apply the bounds so that we can get our bounds emitted correctly.
+ for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
+ std::tie(lhsArg, rhsArg) =
+ createBoundsLoop(lhsArg, rhsArg, boundArg, loc, /*inverse=*/false);
+
+ // Emitter for when we know this isn't a struct or array we have to loop
+ // through. This should work for the 'field' once the get-element call has
+ // been made.
+ auto emitSingleCombiner =
+ [&](mlir::Value lhsArg, mlir::Value rhsArg,
+ const OpenACCReductionRecipe::CombinerRecipe &combiner) {
+ mlir::Type elementTy =
+ mlir::cast<cir::PointerType>(lhsArg.getType()).getPointee();
+ CIRGenFunction::DeclMapRevertingRAII declMapRAIILhs{cgf, combiner.LHS};
+ cgf.setAddrOfLocalVar(
+ combiner.LHS, Address{lhsArg, elementTy,
+ cgf.getContext().getDeclAlign(combiner.LHS)});
+ CIRGenFunction::DeclMapRevertingRAII declMapRAIIRhs{cgf, combiner.RHS};
+ cgf.setAddrOfLocalVar(
+ combiner.RHS, Address{rhsArg, elementTy,
+ cgf.getContext().getDeclAlign(combiner.RHS)});
+
+ [[maybe_unused]] mlir::LogicalResult stmtRes =
+ cgf.emitStmt(combiner.Op, /*useCurrentScope=*/true);
+ };
+
+ // Emitter for when we know this is either a non-array or element of an array
+ // (which also shouldn't be an array type?). This function should generate the
+ // initialization code for an entire 'array-element'/non-array, including
+ // diving into each element of a struct (if necessary).
+ auto emitCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, QualType ty) {
+ assert(!ty->isArrayType() && "Array type shouldn't get here");
+ if (const auto *rd = ty->getAsRecordDecl()) {
+ if (combinerRecipes.size() == 1 &&
+ cgf.getContext().hasSameType(ty, combinerRecipes[0].LHS->getType())) {
+ // If this is a 'top level' operator on the type we can just emit this
+ // as a simple one.
+ emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]);
+ } else {
+ // else we have to handle each individual field after after a
+ // get-element.
+ for (const auto &[field, combiner] :
+ llvm::zip_equal(rd->fields(), combinerRecipes)) {
+ mlir::Type fieldType = cgf.convertType(field->getType());
+ auto fieldPtr = cir::PointerType::get(fieldType);
+
+ mlir::Value lhsField = builder.createGetMember(
+ loc, fieldPtr, lhsArg, field->getName(), field->getFieldIndex());
+ mlir::Value rhsField = builder.createGetMember(
+ loc, fieldPtr, rhsArg, field->getName(), field->getFieldIndex());
+
+ emitSingleCombiner(lhsField, rhsField, combiner);
+ }
+ }
+
+ } else {
+ // if this is a single-thing (because we should know this isn't an array,
+ // as Sema wouldn't let us get here), we can just do a normal emit call.
+ emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]);
+ }
+ };
+
+ if (const auto *cat = cgf.getContext().getAsConstantArrayType(origType)) {
+ // If we're in an array, we have to emit the combiner for each element of
+ // the array.
+ auto itrTy = mlir::cast<cir::IntType>(cgf.PtrDiffTy);
+ auto itrPtrTy = cir::PointerType::get(itrTy);
- mlir::acc::YieldOp::create(builder, locEnd, lhsArg);
+ mlir::Value zero =
+ builder.getConstInt(loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), 0);
+ mlir::Value itr =
+ cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr",
+ cgf.cgm.getSize(cgf.getPointerAlign()));
+ builder.CIRBaseBuilderTy::createStore(loc, zero, itr);
+
+ builder.setInsertionPointAfter(builder.createFor(
+ loc,
+ /*condBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ auto loadItr = cir::LoadOp::create(builder, loc, {itr});
+ mlir::Value arraySize = builder.getConstInt(
+ loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), cat->getZExtSize());
+ auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadItr,
+ arraySize);
+ builder.createCondition(cmp);
+ },
+ /*bodyBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ auto loadItr = cir::LoadOp::create(builder, loc, {itr});
+ auto lhsElt = builder.getArrayElement(
+ loc, loc, lhsArg, cgf.convertType(cat->getElementType()), loadItr,
+ /*shouldDecay=*/true);
+ auto rhsElt = builder.getArrayElement(
+ loc, loc, rhsArg, cgf.convertType(cat->getElementType()), loadItr,
+ /*shouldDecay=*/true);
+
+ emitCombiner(lhsElt, rhsElt, cat->getElementType());
+ builder.createYield(loc);
+ },
+ /*stepBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location loc) {
+ auto loadItr = cir::LoadOp::create(builder, loc, {itr});
+ auto inc = cir::UnaryOp::create(builder, loc, loadItr.getType(),
+ cir::UnaryOpKind::Inc, loadItr);
+ builder.CIRBaseBuilderTy::createStore(loc, inc, itr);
+ builder.createYield(loc);
+ }));
+
+ } else if (origType->isArrayType()) {
+ cgf.cgm.errorNYI(loc,
+ "OpenACC Reduction combiner non-constant array recipe");
+ } else {
+ emitCombiner(lhsArg, rhsArg, origType);
+ }
+
+ builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
+ ls.forceCleanup();
+ mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0));
}
} // namespace clang::CIRGen