//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Helperes to emit OpenACC clause recipes as CIR code. // //===----------------------------------------------------------------------===// #include #include "CIRGenOpenACCRecipe.h" namespace clang::CIRGen { mlir::Block *OpenACCRecipeBuilderBase::createRecipeBlock(mlir::Region ®ion, mlir::Type opTy, mlir::Location loc, size_t numBounds, bool isInit) { llvm::SmallVector types; types.reserve(numBounds + 2); types.push_back(opTy); // The init section is the only one that doesn't have TWO copies of the // operation-type. Copy has a to/from, and destroy has a // 'reference'/'privatized' copy version. if (!isInit) types.push_back(opTy); auto boundsTy = mlir::acc::DataBoundsType::get(&cgf.getMLIRContext()); for (size_t i = 0; i < numBounds; ++i) types.push_back(boundsTy); llvm::SmallVector locs{types.size(), loc}; return builder.createBlock(®ion, region.end(), types, locs); } void OpenACCRecipeBuilderBase::makeAllocaCopy(mlir::Location loc, mlir::Type copyType, mlir::Value numEltsToCopy, mlir::Value offsetPerSubarray, mlir::Value destAlloca, mlir::Value srcAlloca) { mlir::OpBuilder::InsertionGuard guardCase(builder); mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); auto itrPtrTy = cir::PointerType::get(itrTy); mlir::IntegerAttr itrAlign = cgf.cgm.getSize(cgf.getContext().getTypeAlignInChars( cgf.getContext().UnsignedLongLongTy)); auto loopBuilder = [&]() { auto itr = cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", itrAlign); cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0); builder.CIRBaseBuilderTy::createStore(loc, constZero, itr); builder.createFor( loc, /*condBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { // itr < numEltsToCopy // Enforce a trip count of 1 if there wasn't any element count, this // way we can just use this loop with a constant bounds instead of a // separate code path. if (!numEltsToCopy) numEltsToCopy = builder.getConstInt(loc, itrTy, 1); auto loadCur = cir::LoadOp::create(builder, loc, {itr}); auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadCur, numEltsToCopy); builder.createCondition(cmp); }, /*bodyBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { // destAlloca[itr] = srcAlloca[offsetPerSubArray * itr]; auto loadCur = cir::LoadOp::create(builder, loc, {itr}); auto srcOffset = builder.createMul(loc, offsetPerSubarray, loadCur); auto ptrToOffsetIntoSrc = cir::PtrStrideOp::create( builder, loc, copyType, srcAlloca, srcOffset); auto offsetIntoDecayDest = cir::PtrStrideOp::create( builder, loc, builder.getPointerTo(copyType), destAlloca, loadCur); builder.CIRBaseBuilderTy::createStore(loc, ptrToOffsetIntoSrc, offsetIntoDecayDest); builder.createYield(loc); }, /*stepBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { // Simple increment of the iterator. auto load = cir::LoadOp::create(builder, loc, {itr}); auto inc = cir::UnaryOp::create(builder, loc, load.getType(), cir::UnaryOpKind::Inc, load); builder.CIRBaseBuilderTy::createStore(loc, inc, itr); builder.createYield(loc); }); }; cir::ScopeOp::create(builder, loc, [&](mlir::OpBuilder &b, mlir::Location loc) { loopBuilder(); builder.createYield(loc); }); } mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca( mlir::Block *block, SourceRange exprRange, mlir::Location loc, std::string_view allocaName, size_t numBounds, llvm::ArrayRef boundTypes) { mlir::OpBuilder::InsertionGuard guardCase(builder); // Get the range of bounds arguments, which are all but the 1st arg. llvm::ArrayRef boundsRange = block->getArguments().drop_front(1); // boundTypes contains the before and after of each bounds, so it ends up // having 1 extra. Assert this is the case to ensure we don't call this in the // wrong 'block'. assert(boundsRange.size() + 1 == boundTypes.size()); mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); auto idxType = mlir::IndexType::get(&cgf.getMLIRContext()); auto getUpperBound = [&](mlir::Value bound) { auto upperBoundVal = mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound); return mlir::UnrealizedConversionCastOp::create(builder, loc, itrTy, upperBoundVal.getResult()) .getResult(0); }; auto isArrayTy = [&](QualType ty) { if (ty->isArrayType() && !ty->isConstantArrayType()) cgf.cgm.errorNYI(exprRange, "OpenACC recipe init for VLAs"); return ty->isConstantArrayType(); }; mlir::Type topLevelTy = cgf.convertType(boundTypes.back()); cir::PointerType topLevelTyPtr = builder.getPointerTo(topLevelTy); // Do an alloca for the 'top' level type without bounds. mlir::Value initialAlloca = builder.createAlloca( loc, topLevelTyPtr, topLevelTy, allocaName, cgf.getContext().getTypeAlignInChars(boundTypes.back())); bool lastBoundWasArray = isArrayTy(boundTypes.back()); // Make sure we track a moving version of this so we can get our // 'copying' back to correct. mlir::Value lastAlloca = initialAlloca; // Since we're iterating the types in reverse, this sets up for each index // corresponding to the boundsRange to be the 'after application of the // bounds. llvm::ArrayRef boundResults = boundTypes.drop_back(1); // Collect the 'do we have any allocas needed after this type' list. llvm::SmallVector allocasLeftArr; llvm::ArrayRef resultTypes = boundTypes.drop_front(); std::transform_inclusive_scan( resultTypes.begin(), resultTypes.end(), std::back_inserter(allocasLeftArr), std::plus{}, [](QualType ty) { return !ty->isConstantArrayType(); }, false); // Keep track of the number of 'elements' that we're allocating. Individual // allocas should multiply this by the size of its current allocation. mlir::Value cumulativeElts; for (auto [bound, resultType, allocasLeft] : llvm::reverse( llvm::zip_equal(boundsRange, boundResults, allocasLeftArr))) { // if there is no further 'alloca' operation we need to do, we can skip // creating the UB/multiplications/etc. if (!allocasLeft) break; // First: figure out the number of elements in the current 'bound' list. mlir::Value eltsPerSubArray = getUpperBound(bound); mlir::Value eltsToAlloca; // IF we are in a sub-bounds, the total number of elements to alloca is // the product of that one and the current 'bounds' size. That is, // arr[5][5], we would need 25 elements, not just 5. Else it is just the // current number of elements. if (cumulativeElts) eltsToAlloca = builder.createMul(loc, eltsPerSubArray, cumulativeElts); else eltsToAlloca = eltsPerSubArray; if (!lastBoundWasArray) { // If we have to do an allocation, figure out the size of the // allocation. alloca takes the number of bytes, not elements. TypeInfoChars eltInfo = cgf.getContext().getTypeInfoInChars(resultType); cir::ConstantOp eltSize = builder.getConstInt( loc, itrTy, eltInfo.Width.alignTo(eltInfo.Align).getQuantity()); mlir::Value curSize = builder.createMul(loc, eltsToAlloca, eltSize); mlir::Type eltTy = cgf.convertType(resultType); cir::PointerType ptrTy = builder.getPointerTo(eltTy); mlir::Value curAlloca = builder.createAlloca( loc, ptrTy, eltTy, "openacc.init.bounds", cgf.getContext().getTypeAlignInChars(resultType), curSize); makeAllocaCopy(loc, ptrTy, cumulativeElts, eltsPerSubArray, lastAlloca, curAlloca); lastAlloca = curAlloca; } else { // In the case of an array, we just need to decay the pointer, so just do // a zero-offset stride on the last alloca to decay it down an array // level. cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0); lastAlloca = builder.getArrayElement(loc, loc, lastAlloca, cgf.convertType(resultType), constZero, /*shouldDecay=*/true); } cumulativeElts = eltsToAlloca; lastBoundWasArray = isArrayTy(resultType); } return initialAlloca; } std::pair OpenACCRecipeBuilderBase::createBoundsLoop( mlir::Value subscriptedValue, mlir::Value subscriptedValue2, mlir::Value bound, mlir::Location loc, bool inverse) { mlir::Operation *bodyInsertLoc; mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); auto itrPtrTy = cir::PointerType::get(itrTy); mlir::IntegerAttr itrAlign = cgf.cgm.getSize(cgf.getContext().getTypeAlignInChars( cgf.getContext().UnsignedLongLongTy)); auto idxType = mlir::IndexType::get(&cgf.getMLIRContext()); auto doSubscriptOp = [&](mlir::Value subVal, cir::LoadOp idxLoad) -> mlir::Value { auto eltTy = cast(subVal.getType()).getPointee(); if (auto arrayTy = dyn_cast(eltTy)) return builder.getArrayElement(loc, loc, subVal, arrayTy.getElementType(), idxLoad, /*shouldDecay=*/true); assert(isa(eltTy)); auto eltLoad = cir::LoadOp::create(builder, loc, {subVal}); return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad, idxLoad); }; auto forStmtBuilder = [&]() { // get the lower and upper bound for iterating over. auto lowerBoundVal = mlir::acc::GetLowerboundOp::create(builder, loc, idxType, bound); auto lbConversion = mlir::UnrealizedConversionCastOp::create( builder, loc, itrTy, lowerBoundVal.getResult()); auto upperBoundVal = mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound); auto ubConversion = mlir::UnrealizedConversionCastOp::create( builder, loc, itrTy, upperBoundVal.getResult()); // Create a memory location for the iterator. auto itr = cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "iter", itrAlign); // Store to the iterator: either lower bound, or if inverse loop, upper // bound. if (inverse) { cir::ConstantOp constOne = builder.getConstInt(loc, itrTy, 1); auto sub = cir::BinOp::create(builder, loc, itrTy, cir::BinOpKind::Sub, ubConversion.getResult(0), constOne); // Upperbound is exclusive, so subtract 1. builder.CIRBaseBuilderTy::createStore(loc, sub, itr); } else { // Lowerbound is inclusive, so we can include it. builder.CIRBaseBuilderTy::createStore(loc, lbConversion.getResult(0), itr); } // Save the 'end' iterator based on whether we are inverted or not. This // end iterator never changes, so we can just get it and convert it, so no // need to store/load/etc. auto endItr = inverse ? lbConversion : ubConversion; builder.createFor( loc, /*condBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { auto loadCur = cir::LoadOp::create(builder, loc, {itr}); // Use 'not equal' since we are just doing an increment/decrement. auto cmp = builder.createCompare( loc, inverse ? cir::CmpOpKind::ge : cir::CmpOpKind::lt, loadCur, endItr.getResult(0)); builder.createCondition(cmp); }, /*bodyBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { auto load = cir::LoadOp::create(builder, loc, {itr}); if (subscriptedValue) subscriptedValue = doSubscriptOp(subscriptedValue, load); if (subscriptedValue2) subscriptedValue2 = doSubscriptOp(subscriptedValue2, load); bodyInsertLoc = builder.createYield(loc); }, /*stepBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { auto load = cir::LoadOp::create(builder, loc, {itr}); auto unary = cir::UnaryOp::create( builder, loc, load.getType(), inverse ? cir::UnaryOpKind::Dec : cir::UnaryOpKind::Inc, load); builder.CIRBaseBuilderTy::createStore(loc, unary, itr); builder.createYield(loc); }); }; cir::ScopeOp::create(builder, loc, [&](mlir::OpBuilder &b, mlir::Location loc) { forStmtBuilder(); builder.createYield(loc); }); // Leave the insertion point to be inside the body, so we can loop over // these things. builder.setInsertionPoint(bodyInsertLoc); return {subscriptedValue, subscriptedValue2}; } mlir::acc::ReductionOperator OpenACCRecipeBuilderBase::convertReductionOp(OpenACCReductionOperator op) { switch (op) { case OpenACCReductionOperator::Addition: return mlir::acc::ReductionOperator::AccAdd; case OpenACCReductionOperator::Multiplication: return mlir::acc::ReductionOperator::AccMul; case OpenACCReductionOperator::Max: return mlir::acc::ReductionOperator::AccMax; case OpenACCReductionOperator::Min: return mlir::acc::ReductionOperator::AccMin; case OpenACCReductionOperator::BitwiseAnd: return mlir::acc::ReductionOperator::AccIand; case OpenACCReductionOperator::BitwiseOr: return mlir::acc::ReductionOperator::AccIor; case OpenACCReductionOperator::BitwiseXOr: return mlir::acc::ReductionOperator::AccXor; case OpenACCReductionOperator::And: return mlir::acc::ReductionOperator::AccLand; case OpenACCReductionOperator::Or: return mlir::acc::ReductionOperator::AccLor; case OpenACCReductionOperator::Invalid: llvm_unreachable("invalid reduction operator"); } llvm_unreachable("invalid reduction operator"); } // This function generates the 'destroy' section for a recipe. Note // that this function is not 'insertion point' clean, in that it alters the // insertion point to be inside of the 'destroy' section of the recipe, but // doesn't restore it aftewards. void OpenACCRecipeBuilderBase::createRecipeDestroySection( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, CharUnits alignment, QualType origType, size_t numBounds, QualType baseType, mlir::Region &destroyRegion) { mlir::Block *block = createRecipeBlock(destroyRegion, mainOp.getType(), loc, numBounds, /*isInit=*/false); builder.setInsertionPointToEnd(&destroyRegion.back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); mlir::Type elementTy = mlir::cast(mainOp.getType()).getPointee(); auto emitDestroy = [&](mlir::Value var, mlir::Type ty) { Address addr{var, ty, alignment}; cgf.emitDestroy(addr, origType, cgf.getDestroyer(QualType::DK_cxx_destructor)); }; if (numBounds) { mlir::OpBuilder::InsertionGuard guardCase(builder); // Get the range of bounds arguments, which are all but the 1st 2. 1st is // a 'reference', 2nd is the 'private' variant we need to destroy from. llvm::MutableArrayRef boundsRange = block->getArguments().drop_front(2); mlir::Value subscriptedValue = block->getArgument(1); for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc, /*inverse=*/true); emitDestroy(subscriptedValue, cgf.cgm.convertType(origType)); } else { // If we don't have any bounds, we can just destroy the variable directly. // The destroy region has a signature of "original item, privatized item". // So the 2nd item is the one that needs destroying, the former is just // for reference and we don't really have a need for it at the moment. emitDestroy(block->getArgument(1), elementTy); } ls.forceCleanup(); mlir::acc::YieldOp::create(builder, locEnd); } void OpenACCRecipeBuilderBase::makeBoundsInit( mlir::Value alloca, mlir::Location loc, mlir::Block *block, const VarDecl *allocaDecl, QualType origType, bool isInitSection) { mlir::OpBuilder::InsertionGuard guardCase(builder); builder.setInsertionPointToEnd(block); CIRGenFunction::LexicalScope ls(cgf, loc, block); CIRGenFunction::AutoVarEmission tempDeclEmission{*allocaDecl}; tempDeclEmission.emittedAsOffload = true; // The init section is the only one of the handful that only has a single // argument for the 'type', so we have to drop 1 for init, and future calls // to this will need to drop 2. llvm::MutableArrayRef boundsRange = block->getArguments().drop_front(isInitSection ? 1 : 2); mlir::Value subscriptedValue = alloca; for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc, /*inverse=*/false); tempDeclEmission.setAllocatedAddress( Address{subscriptedValue, cgf.convertType(origType), cgf.getContext().getDeclAlign(allocaDecl)}); cgf.emitAutoVarInit(tempDeclEmission); } // TODO: OpenACC: when we start doing firstprivate for array/vlas/etc, we // probably need to do a little work about the 'init' calls to put it in 'copy' // region instead. void OpenACCRecipeBuilderBase::createInitRecipe( mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef boundTypes, const VarDecl *allocaDecl, QualType origType, bool emitInitExpr) { assert(allocaDecl && "Required recipe variable not set?"); CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl}; mlir::Block *block = createRecipeBlock(recipeInitRegion, mainOp.getType(), loc, numBounds, /*isInit=*/true); builder.setInsertionPointToEnd(&recipeInitRegion.back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); const Type *allocaPointeeType = allocaDecl->getType()->getPointeeOrArrayElementType(); // We are OK with no init for builtins, arrays of builtins, or pointers, // else we should NYI so we know to go look for these. if (cgf.getContext().getLangOpts().CPlusPlus && !allocaDecl->getInit() && !allocaDecl->getType()->isPointerType() && !allocaPointeeType->isBuiltinType() && !allocaPointeeType->isPointerType()) { // If we don't have any initialization recipe, we failed during Sema to // initialize this correctly. If we disable the // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll // emit an error to tell us. However, emitting those errors during // production is a violation of the standard, so we cannot do them. cgf.cgm.errorNYI(exprRange, "private/reduction default-init recipe"); } if (!numBounds) { // This is an 'easy' case, we just have to use the builtin init stuff to // initialize this variable correctly. CIRGenFunction::AutoVarEmission tempDeclEmission = cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint()); if (emitInitExpr) cgf.emitAutoVarInit(tempDeclEmission); } else { mlir::Value alloca = makeBoundsAlloca( block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes); // If the initializer is trivial, there is nothing to do here, so save // ourselves some effort. if (emitInitExpr && allocaDecl->getInit() && (!cgf.isTrivialInitializer(allocaDecl->getInit()) || cgf.getContext().getLangOpts().getTrivialAutoVarInit() != LangOptions::TrivialAutoVarInitKind::Uninitialized)) makeBoundsInit(alloca, loc, block, allocaDecl, origType, /*isInitSection=*/true); } ls.forceCleanup(); mlir::acc::YieldOp::create(builder, locEnd); } void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, const VarDecl *allocaDecl, const VarDecl *temporary, mlir::Region ©Region, size_t numBounds) { mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc, numBounds, /*isInit=*/false); builder.setInsertionPointToEnd(©Region.back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); mlir::Value fromArg = block->getArgument(0); mlir::Value toArg = block->getArgument(1); llvm::MutableArrayRef boundsRange = block->getArguments().drop_front(2); for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) std::tie(fromArg, toArg) = createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false); // Set up the 'to' address. mlir::Type elementTy = mlir::cast(toArg.getType()).getPointee(); CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl); tempDeclEmission.emittedAsOffload = true; tempDeclEmission.setAllocatedAddress( Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); // Set up the 'from' address from the temporary. CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary}; cgf.setAddrOfLocalVar( temporary, Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); cgf.emitAutoVarInit(tempDeclEmission); builder.setInsertionPointToEnd(©Region.back()); ls.forceCleanup(); mlir::acc::YieldOp::create(builder, locEnd); } // This function generates the 'combiner' section for a reduction recipe. Note // that this function is not 'insertion point' clean, in that it alters the // insertion point to be inside of the 'combiner' section of the recipe, but // doesn't restore it aftewards. void OpenACCRecipeBuilderBase::createReductionRecipeCombiner( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType, llvm::ArrayRef combinerRecipes) { mlir::Block *block = createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc, numBounds, /*isInit=*/false); builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); mlir::Value lhsArg = block->getArgument(0); mlir::Value rhsArg = block->getArgument(1); llvm::MutableArrayRef boundsRange = block->getArguments().drop_front(2); if (llvm::any_of(combinerRecipes, [](auto &r) { return r.Op == nullptr; })) { cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner not generated"); mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); return; } // apply the bounds so that we can get our bounds emitted correctly. for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) std::tie(lhsArg, rhsArg) = createBoundsLoop(lhsArg, rhsArg, boundArg, loc, /*inverse=*/false); // Emitter for when we know this isn't a struct or array we have to loop // through. This should work for the 'field' once the get-element call has // been made. auto emitSingleCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, const OpenACCReductionRecipe::CombinerRecipe &combiner) { mlir::Type elementTy = mlir::cast(lhsArg.getType()).getPointee(); CIRGenFunction::DeclMapRevertingRAII declMapRAIILhs{cgf, combiner.LHS}; cgf.setAddrOfLocalVar( combiner.LHS, Address{lhsArg, elementTy, cgf.getContext().getDeclAlign(combiner.LHS)}); CIRGenFunction::DeclMapRevertingRAII declMapRAIIRhs{cgf, combiner.RHS}; cgf.setAddrOfLocalVar( combiner.RHS, Address{rhsArg, elementTy, cgf.getContext().getDeclAlign(combiner.RHS)}); [[maybe_unused]] mlir::LogicalResult stmtRes = cgf.emitStmt(combiner.Op, /*useCurrentScope=*/true); }; // Emitter for when we know this is either a non-array or element of an array // (which also shouldn't be an array type?). This function should generate the // initialization code for an entire 'array-element'/non-array, including // diving into each element of a struct (if necessary). auto emitCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, QualType ty) { assert(!ty->isArrayType() && "Array type shouldn't get here"); if (const auto *rd = ty->getAsRecordDecl()) { if (combinerRecipes.size() == 1 && cgf.getContext().hasSameType(ty, combinerRecipes[0].LHS->getType())) { // If this is a 'top level' operator on the type we can just emit this // as a simple one. emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); } else { // else we have to handle each individual field after after a // get-element. const CIRGenRecordLayout &layout = cgf.cgm.getTypes().getCIRGenRecordLayout(rd); for (const auto &[field, combiner] : llvm::zip_equal(rd->fields(), combinerRecipes)) { mlir::Type fieldType = cgf.convertType(field->getType()); auto fieldPtr = cir::PointerType::get(fieldType); unsigned fieldIndex = layout.getCIRFieldNo(field); mlir::Value lhsField = builder.createGetMember( loc, fieldPtr, lhsArg, field->getName(), fieldIndex); mlir::Value rhsField = builder.createGetMember( loc, fieldPtr, rhsArg, field->getName(), fieldIndex); emitSingleCombiner(lhsField, rhsField, combiner); } } } else { // if this is a single-thing (because we should know this isn't an array, // as Sema wouldn't let us get here), we can just do a normal emit call. emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); } }; if (const auto *cat = cgf.getContext().getAsConstantArrayType(origType)) { // If we're in an array, we have to emit the combiner for each element of // the array. auto itrTy = mlir::cast(cgf.ptrDiffTy); auto itrPtrTy = cir::PointerType::get(itrTy); mlir::Value zero = builder.getConstInt(loc, mlir::cast(cgf.ptrDiffTy), 0); mlir::Value itr = cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", cgf.cgm.getSize(cgf.getPointerAlign())); builder.CIRBaseBuilderTy::createStore(loc, zero, itr); builder.setInsertionPointAfter(builder.createFor( loc, /*condBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { auto loadItr = cir::LoadOp::create(builder, loc, {itr}); mlir::Value arraySize = builder.getConstInt( loc, mlir::cast(cgf.ptrDiffTy), cat->getZExtSize()); auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadItr, arraySize); builder.createCondition(cmp); }, /*bodyBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { auto loadItr = cir::LoadOp::create(builder, loc, {itr}); auto lhsElt = builder.getArrayElement( loc, loc, lhsArg, cgf.convertType(cat->getElementType()), loadItr, /*shouldDecay=*/true); auto rhsElt = builder.getArrayElement( loc, loc, rhsArg, cgf.convertType(cat->getElementType()), loadItr, /*shouldDecay=*/true); emitCombiner(lhsElt, rhsElt, cat->getElementType()); builder.createYield(loc); }, /*stepBuilder=*/ [&](mlir::OpBuilder &b, mlir::Location loc) { auto loadItr = cir::LoadOp::create(builder, loc, {itr}); auto inc = cir::UnaryOp::create(builder, loc, loadItr.getType(), cir::UnaryOpKind::Inc, loadItr); builder.CIRBaseBuilderTy::createStore(loc, inc, itr); builder.createYield(loc); })); } else if (origType->isArrayType()) { cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner non-constant array recipe"); } else { emitCombiner(lhsArg, rhsArg, origType); } builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); ls.forceCleanup(); mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); } } // namespace clang::CIRGen