diff options
Diffstat (limited to 'clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp')
| -rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp | 468 |
1 files changed, 412 insertions, 56 deletions
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index a4c2641fe631..f638d391d55c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +#include <numeric> + #include "CIRGenOpenACCRecipe.h" namespace clang::CIRGen { @@ -34,11 +36,194 @@ mlir::Block *OpenACCRecipeBuilderBase::createRecipeBlock(mlir::Region ®ion, llvm::SmallVector<mlir::Location> locs{types.size(), loc}; return builder.createBlock(®ion, region.end(), types, locs); } +void OpenACCRecipeBuilderBase::makeAllocaCopy(mlir::Location loc, + mlir::Type copyType, + mlir::Value numEltsToCopy, + mlir::Value offsetPerSubarray, + mlir::Value destAlloca, + mlir::Value srcAlloca) { + mlir::OpBuilder::InsertionGuard guardCase(builder); + + mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); + auto itrPtrTy = cir::PointerType::get(itrTy); + mlir::IntegerAttr itrAlign = + cgf.cgm.getSize(cgf.getContext().getTypeAlignInChars( + cgf.getContext().UnsignedLongLongTy)); + + auto loopBuilder = [&]() { + auto itr = + cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", itrAlign); + cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0); + builder.CIRBaseBuilderTy::createStore(loc, constZero, itr); + builder.createFor( + loc, + /*condBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + // itr < numEltsToCopy + // Enforce a trip count of 1 if there wasn't any element count, this + // way we can just use this loop with a constant bounds instead of a + // separate code path. + if (!numEltsToCopy) + numEltsToCopy = builder.getConstInt(loc, itrTy, 1); + + auto loadCur = cir::LoadOp::create(builder, loc, {itr}); + auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadCur, + numEltsToCopy); + builder.createCondition(cmp); + }, + /*bodyBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + // destAlloca[itr] = srcAlloca[offsetPerSubArray * itr]; + auto loadCur = cir::LoadOp::create(builder, loc, {itr}); + auto srcOffset = builder.createMul(loc, offsetPerSubarray, loadCur); + + auto ptrToOffsetIntoSrc = cir::PtrStrideOp::create( + builder, loc, copyType, srcAlloca, srcOffset); + + auto offsetIntoDecayDest = cir::PtrStrideOp::create( + builder, loc, builder.getPointerTo(copyType), destAlloca, + loadCur); + + builder.CIRBaseBuilderTy::createStore(loc, ptrToOffsetIntoSrc, + offsetIntoDecayDest); + builder.createYield(loc); + }, + /*stepBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + // Simple increment of the iterator. + auto load = cir::LoadOp::create(builder, loc, {itr}); + auto inc = cir::UnaryOp::create(builder, loc, load.getType(), + cir::UnaryOpKind::Inc, load); + builder.CIRBaseBuilderTy::createStore(loc, inc, itr); + builder.createYield(loc); + }); + }; + + cir::ScopeOp::create(builder, loc, + [&](mlir::OpBuilder &b, mlir::Location loc) { + loopBuilder(); + builder.createYield(loc); + }); +} + +mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca( + mlir::Block *block, SourceRange exprRange, mlir::Location loc, + std::string_view allocaName, size_t numBounds, + llvm::ArrayRef<QualType> boundTypes) { + mlir::OpBuilder::InsertionGuard guardCase(builder); + + // Get the range of bounds arguments, which are all but the 1st arg. + llvm::ArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(1); + + // boundTypes contains the before and after of each bounds, so it ends up + // having 1 extra. Assert this is the case to ensure we don't call this in the + // wrong 'block'. + assert(boundsRange.size() + 1 == boundTypes.size()); + + mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); + auto idxType = mlir::IndexType::get(&cgf.getMLIRContext()); + + auto getUpperBound = [&](mlir::Value bound) { + auto upperBoundVal = + mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound); + return mlir::UnrealizedConversionCastOp::create(builder, loc, itrTy, + upperBoundVal.getResult()) + .getResult(0); + }; + + auto isArrayTy = [&](QualType ty) { + if (ty->isArrayType() && !ty->isConstantArrayType()) + cgf.cgm.errorNYI(exprRange, "OpenACC recipe init for VLAs"); + return ty->isConstantArrayType(); + }; + + mlir::Type topLevelTy = cgf.convertType(boundTypes.back()); + cir::PointerType topLevelTyPtr = builder.getPointerTo(topLevelTy); + // Do an alloca for the 'top' level type without bounds. + mlir::Value initialAlloca = builder.createAlloca( + loc, topLevelTyPtr, topLevelTy, allocaName, + cgf.getContext().getTypeAlignInChars(boundTypes.back())); + + bool lastBoundWasArray = isArrayTy(boundTypes.back()); + + // Make sure we track a moving version of this so we can get our + // 'copying' back to correct. + mlir::Value lastAlloca = initialAlloca; + + // Since we're iterating the types in reverse, this sets up for each index + // corresponding to the boundsRange to be the 'after application of the + // bounds. + llvm::ArrayRef<QualType> boundResults = boundTypes.drop_back(1); + + // Collect the 'do we have any allocas needed after this type' list. + llvm::SmallVector<bool> allocasLeftArr; + llvm::ArrayRef<QualType> resultTypes = boundTypes.drop_front(); + std::transform_inclusive_scan( + resultTypes.begin(), resultTypes.end(), + std::back_inserter(allocasLeftArr), std::plus<bool>{}, + [](QualType ty) { return !ty->isConstantArrayType(); }, false); + + // Keep track of the number of 'elements' that we're allocating. Individual + // allocas should multiply this by the size of its current allocation. + mlir::Value cumulativeElts; + for (auto [bound, resultType, allocasLeft] : llvm::reverse( + llvm::zip_equal(boundsRange, boundResults, allocasLeftArr))) { + + // if there is no further 'alloca' operation we need to do, we can skip + // creating the UB/multiplications/etc. + if (!allocasLeft) + break; + + // First: figure out the number of elements in the current 'bound' list. + mlir::Value eltsPerSubArray = getUpperBound(bound); + mlir::Value eltsToAlloca; + + // IF we are in a sub-bounds, the total number of elements to alloca is + // the product of that one and the current 'bounds' size. That is, + // arr[5][5], we would need 25 elements, not just 5. Else it is just the + // current number of elements. + if (cumulativeElts) + eltsToAlloca = builder.createMul(loc, eltsPerSubArray, cumulativeElts); + else + eltsToAlloca = eltsPerSubArray; + + if (!lastBoundWasArray) { + // If we have to do an allocation, figure out the size of the + // allocation. alloca takes the number of bytes, not elements. + TypeInfoChars eltInfo = cgf.getContext().getTypeInfoInChars(resultType); + cir::ConstantOp eltSize = builder.getConstInt( + loc, itrTy, eltInfo.Width.alignTo(eltInfo.Align).getQuantity()); + mlir::Value curSize = builder.createMul(loc, eltsToAlloca, eltSize); + + mlir::Type eltTy = cgf.convertType(resultType); + cir::PointerType ptrTy = builder.getPointerTo(eltTy); + mlir::Value curAlloca = builder.createAlloca( + loc, ptrTy, eltTy, "openacc.init.bounds", + cgf.getContext().getTypeAlignInChars(resultType), curSize); + + makeAllocaCopy(loc, ptrTy, cumulativeElts, eltsPerSubArray, lastAlloca, + curAlloca); + lastAlloca = curAlloca; + } else { + // In the case of an array, we just need to decay the pointer, so just do + // a zero-offset stride on the last alloca to decay it down an array + // level. + cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0); + lastAlloca = builder.getArrayElement(loc, loc, lastAlloca, + cgf.convertType(resultType), + constZero, /*shouldDecay=*/true); + } + + cumulativeElts = eltsToAlloca; + lastBoundWasArray = isArrayTy(resultType); + } + return initialAlloca; +} -mlir::Value -OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, - mlir::Value bound, - mlir::Location loc, bool inverse) { +std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop( + mlir::Value subscriptedValue, mlir::Value subscriptedValue2, + mlir::Value bound, mlir::Location loc, bool inverse) { mlir::Operation *bodyInsertLoc; mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); @@ -54,7 +239,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, if (auto arrayTy = dyn_cast<cir::ArrayType>(eltTy)) return builder.getArrayElement(loc, loc, subVal, arrayTy.getElementType(), - idxLoad.getResult(), + idxLoad, /*shouldDecay=*/true); assert(isa<cir::PointerType>(eltTy)); @@ -62,8 +247,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, auto eltLoad = cir::LoadOp::create(builder, loc, {subVal}); return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad, - idxLoad.getResult()) - .getResult(); + idxLoad); }; auto forStmtBuilder = [&]() { @@ -85,12 +269,11 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, if (inverse) { cir::ConstantOp constOne = builder.getConstInt(loc, itrTy, 1); - auto sub = - cir::BinOp::create(builder, loc, itrTy, cir::BinOpKind::Sub, - ubConversion.getResult(0), constOne.getResult()); + auto sub = cir::BinOp::create(builder, loc, itrTy, cir::BinOpKind::Sub, + ubConversion.getResult(0), constOne); // Upperbound is exclusive, so subtract 1. - builder.CIRBaseBuilderTy::createStore(loc, sub.getResult(), itr); + builder.CIRBaseBuilderTy::createStore(loc, sub, itr); } else { // Lowerbound is inclusive, so we can include it. builder.CIRBaseBuilderTy::createStore(loc, lbConversion.getResult(0), @@ -108,8 +291,8 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, auto loadCur = cir::LoadOp::create(builder, loc, {itr}); // Use 'not equal' since we are just doing an increment/decrement. auto cmp = builder.createCompare( - loc, inverse ? cir::CmpOpKind::ge : cir::CmpOpKind::lt, - loadCur.getResult(), endItr.getResult(0)); + loc, inverse ? cir::CmpOpKind::ge : cir::CmpOpKind::lt, loadCur, + endItr.getResult(0)); builder.createCondition(cmp); }, /*bodyBuilder=*/ @@ -118,16 +301,17 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, if (subscriptedValue) subscriptedValue = doSubscriptOp(subscriptedValue, load); + if (subscriptedValue2) + subscriptedValue2 = doSubscriptOp(subscriptedValue2, load); bodyInsertLoc = builder.createYield(loc); }, /*stepBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { auto load = cir::LoadOp::create(builder, loc, {itr}); - auto unary = cir::UnaryOp::create(builder, loc, load.getType(), - inverse ? cir::UnaryOpKind::Dec - : cir::UnaryOpKind::Inc, - load.getResult()); - builder.CIRBaseBuilderTy::createStore(loc, unary.getResult(), itr); + auto unary = cir::UnaryOp::create( + builder, loc, load.getType(), + inverse ? cir::UnaryOpKind::Dec : cir::UnaryOpKind::Inc, load); + builder.CIRBaseBuilderTy::createStore(loc, unary, itr); builder.createYield(loc); }); }; @@ -141,7 +325,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, // Leave the insertion point to be inside the body, so we can loop over // these things. builder.setInsertionPoint(bodyInsertLoc); - return subscriptedValue; + return {subscriptedValue, subscriptedValue2}; } mlir::acc::ReductionOperator @@ -214,25 +398,50 @@ void OpenACCRecipeBuilderBase::createRecipeDestroySection( emitDestroy(block->getArgument(1), elementTy); } + ls.forceCleanup(); mlir::acc::YieldOp::create(builder, locEnd); } +void OpenACCRecipeBuilderBase::makeBoundsInit( + mlir::Value alloca, mlir::Location loc, mlir::Block *block, + const VarDecl *allocaDecl, QualType origType, bool isInitSection) { + mlir::OpBuilder::InsertionGuard guardCase(builder); + builder.setInsertionPointToEnd(block); + CIRGenFunction::LexicalScope ls(cgf, loc, block); + + CIRGenFunction::AutoVarEmission tempDeclEmission{*allocaDecl}; + tempDeclEmission.emittedAsOffload = true; + + // The init section is the only one of the handful that only has a single + // argument for the 'type', so we have to drop 1 for init, and future calls + // to this will need to drop 2. + llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(isInitSection ? 1 : 2); + + mlir::Value subscriptedValue = alloca; + for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) + subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc, + /*inverse=*/false); + + tempDeclEmission.setAllocatedAddress( + Address{subscriptedValue, cgf.convertType(origType), + cgf.getContext().getDeclAlign(allocaDecl)}); + cgf.emitAutoVarInit(tempDeclEmission); +} -// TODO: OpenACC: When we get this implemented for the reduction/firstprivate, -// this might end up re-merging with createRecipeInitCopy. For now, keep it -// separate until we're sure what everything looks like to keep this as clean -// as possible. -void OpenACCRecipeBuilderBase::createPrivateInitRecipe( +// TODO: OpenACC: when we start doing firstprivate for array/vlas/etc, we +// probably need to do a little work about the 'init' calls to put it in 'copy' +// region instead. +void OpenACCRecipeBuilderBase::createInitRecipe( mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, - mlir::Value mainOp, mlir::acc::PrivateRecipeOp recipe, size_t numBounds, + mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl, - QualType origType, const Expr *initExpr) { + QualType origType, bool emitInitExpr) { assert(allocaDecl && "Required recipe variable not set?"); CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl}; - mlir::Block *block = - createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc, - numBounds, /*isInit=*/true); - builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); + mlir::Block *block = createRecipeBlock(recipeInitRegion, mainOp.getType(), + loc, numBounds, /*isInit=*/true); + builder.setInsertionPointToEnd(&recipeInitRegion.back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); const Type *allocaPointeeType = @@ -248,7 +457,7 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe( // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll // emit an error to tell us. However, emitting those errors during // production is a violation of the standard, so we cannot do them. - cgf.cgm.errorNYI(exprRange, "private default-init recipe"); + cgf.cgm.errorNYI(exprRange, "private/reduction default-init recipe"); } if (!numBounds) { @@ -256,61 +465,208 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe( // initialize this variable correctly. CIRGenFunction::AutoVarEmission tempDeclEmission = cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint()); - cgf.emitAutoVarInit(tempDeclEmission); + if (emitInitExpr) + cgf.emitAutoVarInit(tempDeclEmission); } else { - cgf.cgm.errorNYI(exprRange, "private-init with bounds"); + mlir::Value alloca = makeBoundsAlloca( + block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes); + + // If the initializer is trivial, there is nothing to do here, so save + // ourselves some effort. + if (emitInitExpr && allocaDecl->getInit() && + (!cgf.isTrivialInitializer(allocaDecl->getInit()) || + cgf.getContext().getLangOpts().getTrivialAutoVarInit() != + LangOptions::TrivialAutoVarInitKind::Uninitialized)) + makeBoundsInit(alloca, loc, block, allocaDecl, origType, + /*isInitSection=*/true); } + ls.forceCleanup(); mlir::acc::YieldOp::create(builder, locEnd); } void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - CIRGenFunction::AutoVarEmission tempDeclEmission, - mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe, - const VarDecl *temporary) { - mlir::Block *block = - createRecipeBlock(recipe.getCopyRegion(), mainOp.getType(), loc, - /*numBounds=*/0, /*isInit=*/false); - builder.setInsertionPointToEnd(&recipe.getCopyRegion().back()); + const VarDecl *allocaDecl, const VarDecl *temporary, + mlir::Region ©Region, size_t numBounds) { + mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc, + numBounds, /*isInit=*/false); + builder.setInsertionPointToEnd(©Region.back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); - mlir::BlockArgument fromArg = block->getArgument(0); - mlir::BlockArgument toArg = block->getArgument(1); + mlir::Value fromArg = block->getArgument(0); + mlir::Value toArg = block->getArgument(1); - mlir::Type elementTy = - mlir::cast<cir::PointerType>(mainOp.getType()).getPointee(); + llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(2); + + for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) + std::tie(fromArg, toArg) = + createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false); - // Set the address of the emission to be the argument, so that we initialize - // that instead of the variable in the other block. + // Set up the 'to' address. + mlir::Type elementTy = + mlir::cast<cir::PointerType>(toArg.getType()).getPointee(); + CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl); + tempDeclEmission.emittedAsOffload = true; tempDeclEmission.setAllocatedAddress( - Address{toArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)}); - tempDeclEmission.EmittedAsOffload = true; + Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); + // Set up the 'from' address from the temporary. CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary}; cgf.setAddrOfLocalVar( temporary, - Address{fromArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)}); - + Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); cgf.emitAutoVarInit(tempDeclEmission); + + builder.setInsertionPointToEnd(©Region.back()); + ls.forceCleanup(); mlir::acc::YieldOp::create(builder, locEnd); } + // This function generates the 'combiner' section for a reduction recipe. Note // that this function is not 'insertion point' clean, in that it alters the // insertion point to be inside of the 'combiner' section of the recipe, but // doesn't restore it aftewards. void OpenACCRecipeBuilderBase::createReductionRecipeCombiner( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - mlir::acc::ReductionRecipeOp recipe) { - mlir::Block *block = builder.createBlock( - &recipe.getCombinerRegion(), recipe.getCombinerRegion().end(), - {mainOp.getType(), mainOp.getType()}, {loc, loc}); + mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType, + llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes) { + mlir::Block *block = + createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc, + numBounds, /*isInit=*/false); builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); - mlir::BlockArgument lhsArg = block->getArgument(0); + mlir::Value lhsArg = block->getArgument(0); + mlir::Value rhsArg = block->getArgument(1); + llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(2); + + if (llvm::any_of(combinerRecipes, [](auto &r) { return r.Op == nullptr; })) { + cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner not generated"); + mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); + return; + } + + // apply the bounds so that we can get our bounds emitted correctly. + for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) + std::tie(lhsArg, rhsArg) = + createBoundsLoop(lhsArg, rhsArg, boundArg, loc, /*inverse=*/false); + + // Emitter for when we know this isn't a struct or array we have to loop + // through. This should work for the 'field' once the get-element call has + // been made. + auto emitSingleCombiner = + [&](mlir::Value lhsArg, mlir::Value rhsArg, + const OpenACCReductionRecipe::CombinerRecipe &combiner) { + mlir::Type elementTy = + mlir::cast<cir::PointerType>(lhsArg.getType()).getPointee(); + CIRGenFunction::DeclMapRevertingRAII declMapRAIILhs{cgf, combiner.LHS}; + cgf.setAddrOfLocalVar( + combiner.LHS, Address{lhsArg, elementTy, + cgf.getContext().getDeclAlign(combiner.LHS)}); + CIRGenFunction::DeclMapRevertingRAII declMapRAIIRhs{cgf, combiner.RHS}; + cgf.setAddrOfLocalVar( + combiner.RHS, Address{rhsArg, elementTy, + cgf.getContext().getDeclAlign(combiner.RHS)}); + + [[maybe_unused]] mlir::LogicalResult stmtRes = + cgf.emitStmt(combiner.Op, /*useCurrentScope=*/true); + }; + + // Emitter for when we know this is either a non-array or element of an array + // (which also shouldn't be an array type?). This function should generate the + // initialization code for an entire 'array-element'/non-array, including + // diving into each element of a struct (if necessary). + auto emitCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, QualType ty) { + assert(!ty->isArrayType() && "Array type shouldn't get here"); + if (const auto *rd = ty->getAsRecordDecl()) { + if (combinerRecipes.size() == 1 && + cgf.getContext().hasSameType(ty, combinerRecipes[0].LHS->getType())) { + // If this is a 'top level' operator on the type we can just emit this + // as a simple one. + emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); + } else { + // else we have to handle each individual field after after a + // get-element. + for (const auto &[field, combiner] : + llvm::zip_equal(rd->fields(), combinerRecipes)) { + mlir::Type fieldType = cgf.convertType(field->getType()); + auto fieldPtr = cir::PointerType::get(fieldType); + + mlir::Value lhsField = builder.createGetMember( + loc, fieldPtr, lhsArg, field->getName(), field->getFieldIndex()); + mlir::Value rhsField = builder.createGetMember( + loc, fieldPtr, rhsArg, field->getName(), field->getFieldIndex()); + + emitSingleCombiner(lhsField, rhsField, combiner); + } + } + + } else { + // if this is a single-thing (because we should know this isn't an array, + // as Sema wouldn't let us get here), we can just do a normal emit call. + emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); + } + }; + + if (const auto *cat = cgf.getContext().getAsConstantArrayType(origType)) { + // If we're in an array, we have to emit the combiner for each element of + // the array. + auto itrTy = mlir::cast<cir::IntType>(cgf.PtrDiffTy); + auto itrPtrTy = cir::PointerType::get(itrTy); - mlir::acc::YieldOp::create(builder, locEnd, lhsArg); + mlir::Value zero = + builder.getConstInt(loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), 0); + mlir::Value itr = + cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", + cgf.cgm.getSize(cgf.getPointerAlign())); + builder.CIRBaseBuilderTy::createStore(loc, zero, itr); + + builder.setInsertionPointAfter(builder.createFor( + loc, + /*condBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + mlir::Value arraySize = builder.getConstInt( + loc, mlir::cast<cir::IntType>(cgf.PtrDiffTy), cat->getZExtSize()); + auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadItr, + arraySize); + builder.createCondition(cmp); + }, + /*bodyBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + auto lhsElt = builder.getArrayElement( + loc, loc, lhsArg, cgf.convertType(cat->getElementType()), loadItr, + /*shouldDecay=*/true); + auto rhsElt = builder.getArrayElement( + loc, loc, rhsArg, cgf.convertType(cat->getElementType()), loadItr, + /*shouldDecay=*/true); + + emitCombiner(lhsElt, rhsElt, cat->getElementType()); + builder.createYield(loc); + }, + /*stepBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + auto loadItr = cir::LoadOp::create(builder, loc, {itr}); + auto inc = cir::UnaryOp::create(builder, loc, loadItr.getType(), + cir::UnaryOpKind::Inc, loadItr); + builder.CIRBaseBuilderTy::createStore(loc, inc, itr); + builder.createYield(loc); + })); + + } else if (origType->isArrayType()) { + cgf.cgm.errorNYI(loc, + "OpenACC Reduction combiner non-constant array recipe"); + } else { + emitCombiner(lhsArg, rhsArg, origType); + } + + builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); + ls.forceCleanup(); + mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); } } // namespace clang::CIRGen |
